diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index ebf3caccd9c62..dfc6c505c8a1d 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -14,20 +14,53 @@ jobs:
   build:
     runs-on: ubuntu-latest
     strategy:
+      max-parallel: 8
       matrix:
         include:
-          - scala: "scala-2.11"
-            spark: "spark2"
-          - scala: "scala-2.11"
-            spark: "spark2,spark-shade-unbundle-avro"
-          - scala: "scala-2.12"
-            spark: "spark3.1.x"
-          - scala: "scala-2.12"
-            spark: "spark3.1.x,spark-shade-unbundle-avro"
-          - scala: "scala-2.12"
-            spark: "spark3"
-          - scala: "scala-2.12"
-            spark: "spark3,spark-shade-unbundle-avro"
+          # Spark 2.4.4, scala 2.11
+          - scalaProfile: "scala-2.11"
+            sparkProfile: "spark2.4"
+            sparkVersion: "2.4.4"
+            flinkProfile: "flink1.13"
+
+          # Spark 2.4.4, scala 2.12
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark2.4"
+            sparkVersion: "2.4.4"
+            flinkProfile: "flink1.14"
+
+          # Spark 3.1.x
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.1"
+            sparkVersion: "3.1.0"
+            flinkProfile: "flink1.13"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.1"
+            sparkVersion: "3.1.1"
+            flinkProfile: "flink1.13"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.1"
+            sparkVersion: "3.1.2"
+            flinkProfile: "flink1.14"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.1"
+            sparkVersion: "3.1.3"
+            flinkProfile: "flink1.14"
+
+          # Spark 3.2.x
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.2"
+            sparkVersion: "3.2.0"
+            flinkProfile: "flink1.13"
+
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.2"
+            sparkVersion: "3.2.1"
+            flinkProfile: "flink1.14"
+
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK 8
@@ -38,6 +71,18 @@ jobs:
           architecture: x64
       - name: Build Project
         env:
-          SCALA_PROFILE: ${{ matrix.scala }}
-          SPARK_PROFILE: ${{ matrix.spark }}
-        run: mvn install -P "$SCALA_PROFILE,$SPARK_PROFILE" -DskipTests=true -Dmaven.javadoc.skip=true -B -V
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_VERSION: ${{ matrix.sparkVersion }}
+          FLINK_PROFILE: ${{ matrix.flinkProfile }}
+        run:
+          mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V
+      - name: Quickstart Test
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_VERSION: ${{ matrix.sparkVersion }}
+          FLINK_PROFILE: ${{ matrix.flinkProfile }}
+        if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x
+        run:
+          mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
diff --git a/README.md b/README.md
index 6d3475755ff87..e646463bac992 100644
--- a/README.md
+++ b/README.md
@@ -70,41 +70,36 @@ To build the Javadoc for all Java and Scala classes:
 mvn clean javadoc:aggregate -Pjavadocs
 ```
 
-### Build with Scala 2.12
+### Build with different Spark versions
 
-The default Scala version supported is 2.11. To build for Scala 2.12 version, build using `scala-2.12` profile
+The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the
+corresponding profile
 
-```
-mvn clean package -DskipTests -Dscala-2.12
-```
-
-### Build with Spark 3
-
-The default Spark version supported is 2.4.4. To build for different Spark 3 versions, use the corresponding profile
+| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes |
+|--|--|--|--|
+| Spark 2.4, Scala 2.11  | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default  |
+| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 |
+| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x |
+| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x |
+| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` |
+| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 |
+| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 |
 
+For example,
 ```
-# Build against Spark 3.2.1 (the default build shipped with the public Spark 3 bundle)
-mvn clean package -DskipTests -Dspark3
+# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle)
+mvn clean package -DskipTests -Pspark3.2
 
-# Build against Spark 3.1.2
-mvn clean package -DskipTests -Dspark3.1.x
-```
-
-### Build without spark-avro module
-
-The default hudi-jar bundles spark-avro module. To build without spark-avro module, build using `spark-shade-unbundle-avro` profile
+# Build against Spark 3.1.x
+mvn clean package -DskipTests -Pspark3.1
 
+# Build against Spark 2.4.4 and Scala 2.12
+mvn clean package -DskipTests -Pspark2.4,scala-2.12
 ```
-# Checkout code and build
-git clone https://github.com/apache/hudi.git && cd hudi
-mvn clean package -DskipTests -Pspark-shade-unbundle-avro
 
-# Start command
-spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
-  --packages org.apache.spark:spark-avro_2.11:2.4.4 \
-  --jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \
-  --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
-```
+### What about "spark-avro" module? 
+
+Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`
 
 ## Running Tests
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 397ff9800c4b3..8ca54c1ab39ef 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -22,11 +22,11 @@ pool:
   vmImage: 'ubuntu-18.04'
 
 variables:
-  MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
-  MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER) -Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true'
+  MAVEN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true'
   SPARK_VERSION: '2.4.4'
   HADOOP_VERSION: '2.7'
   SPARK_ARCHIVE: spark-$(SPARK_VERSION)-bin-hadoop$(HADOOP_VERSION)
+  EXCLUDE_TESTED_MODULES: '!hudi-examples/hudi-examples-common,!hudi-examples/hudi-examples-flink,!hudi-examples/hudi-examples-java,!hudi-examples/hudi-examples-spark,!hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync'
 
 stages:
   - stage: test
@@ -35,23 +35,15 @@ stages:
         displayName: UT FT common & flink & UT client/spark-client
         timeoutInMinutes: '90'
         steps:
-          - task: Cache@2
-            displayName: set cache
-            inputs:
-              key: 'maven | "$(Agent.OS)" | **/pom.xml'
-              restoreKeys: |
-                maven | "$(Agent.OS)"
-                maven
-              path: $(MAVEN_CACHE_FOLDER)
           - task: Maven@3
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
-              goals: 'install'
+              goals: 'clean install'
               options: -T 2.5C -DskipTests
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: UT common flink client/spark-client
             inputs:
@@ -60,7 +52,7 @@ stages:
               options: -Punit-tests -pl hudi-common,hudi-flink-datasource/hudi-flink,hudi-client/hudi-spark-client
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: FT common flink
             inputs:
@@ -69,28 +61,20 @@ stages:
               options: -Pfunctional-tests -pl hudi-common,hudi-flink-datasource/hudi-flink
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
       - job: UT_FT_2
         displayName: FT client/spark-client
         timeoutInMinutes: '90'
         steps:
-          - task: Cache@2
-            displayName: set cache
-            inputs:
-              key: 'maven | "$(Agent.OS)" | **/pom.xml'
-              restoreKeys: |
-                maven | "$(Agent.OS)"
-                maven
-              path: $(MAVEN_CACHE_FOLDER)
           - task: Maven@3
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
-              goals: 'install'
+              goals: 'clean install'
               options: -T 2.5C -DskipTests
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: FT client/spark-client
             inputs:
@@ -99,28 +83,20 @@ stages:
               options: -Pfunctional-tests -pl hudi-client/hudi-spark-client
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
       - job: UT_FT_3
         displayName: UT FT clients & cli & utilities & sync/hive-sync
         timeoutInMinutes: '90'
         steps:
-          - task: Cache@2
-            displayName: set cache
-            inputs:
-              key: 'maven | "$(Agent.OS)" | **/pom.xml'
-              restoreKeys: |
-                maven | "$(Agent.OS)"
-                maven
-              path: $(MAVEN_CACHE_FOLDER)
           - task: Maven@3
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
-              goals: 'install'
+              goals: 'clean install'
               options: -T 2.5C -DskipTests
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: UT clients & cli & utilities & sync/hive-sync
             inputs:
@@ -129,7 +105,7 @@ stages:
               options: -Punit-tests -pl hudi-client/hudi-client-common,hudi-client/hudi-flink-client,hudi-client/hudi-java-client,hudi-cli,hudi-utilities,hudi-sync/hudi-hive-sync
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: FT clients & cli & utilities & sync/hive-sync
             inputs:
@@ -138,46 +114,38 @@ stages:
               options: -Pfunctional-tests -pl hudi-client/hudi-client-common,hudi-client/hudi-flink-client,hudi-client/hudi-java-client,hudi-cli,hudi-utilities,hudi-sync/hudi-hive-sync
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
       - job: UT_FT_4
         displayName: UT FT other modules
         timeoutInMinutes: '90'
         steps:
-          - task: Cache@2
-            displayName: set cache
-            inputs:
-              key: 'maven | "$(Agent.OS)" | **/pom.xml'
-              restoreKeys: |
-                maven | "$(Agent.OS)"
-                maven
-              path: $(MAVEN_CACHE_FOLDER)
           - task: Maven@3
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
-              goals: 'install'
+              goals: 'clean install'
               options: -T 2.5C -DskipTests
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: UT other modules
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: -Punit-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
+              options: -Punit-tests -pl $(EXCLUDE_TESTED_MODULES)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
           - task: Maven@3
             displayName: FT other modules
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: -Pfunctional-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
+              options: -Pfunctional-tests -pl $(EXCLUDE_TESTED_MODULES)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-              mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
+              mavenOptions: '-Xmx4g $(MAVEN_OPTS)'
       - job: IT
         displayName: IT modules
         timeoutInMinutes: '90'
diff --git a/docker/README.md b/docker/README.md
index 226775184e84f..0851e9b5b7858 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -51,9 +51,19 @@ mvn clean pre-integration-test -DskipTests -Ddocker.compose.skip=true -Ddocker.b
 mvn clean pre-integration-test -DskipTests -Ddocker.compose.skip=true -Ddocker.build.skip=false -pl :hudi-hadoop-trinobase-docker -am
 ```
 
-Alternatively, you can use `docker` cli directly under `hoodie/hadoop`. Note that, you need to manually name your local
-image by using `-t` option to match the naming in the `pom.xml`, so that you can update the corresponding image
-repository in Docker Hub (detailed steps in the next section).
+Alternatively, you can use `docker` cli directly under `hoodie/hadoop` to build images in a faster way. If you use this
+approach, make sure you first build Hudi modules with `integration-tests` profile as below so that the latest Hudi jars
+built are copied to the corresponding Hudi docker folder, e.g., `$HUDI_DIR/docker/hoodie/hadoop/hive_base/target`, which
+is required to build each docker image. Otherwise, the `target/` folder can be missing and `docker` cli complains about
+that: `failed to compute cache key: "/target" not found: not found`.
+
+```shell
+mvn -Pintegration-tests clean package -DskipTests
+```
+
+Note that, to build the image with `docker` cli, you need to manually name your local image by using `-t` option to
+match the naming in the `pom.xml`, so that you can update the corresponding image repository in Docker Hub (detailed
+steps in the next section).
 
 ```shell
 # Run under hoodie/hadoop, the <tag> is optional, "latest" by default
@@ -82,7 +92,7 @@ docker push apachehudi/hudi-hadoop_2.8.4-trinobase_368
 You can also easily push the image to the Docker Hub using Docker Desktop app: go to `Images`, search for the image by
 the name, and then click on the three dots and `Push to Hub`.
 
-![Push to Docker Hub](push_to_docker_hub.png)
+![Push to Docker Hub](images/push_to_docker_hub.png)
 
 Note that you need to ask for permission to upload the Hudi Docker Demo images to the repositories.
 
diff --git a/docker/demo/config/test-suite/cow-spark-long-running.yaml b/docker/demo/config/test-suite/cow-spark-long-running.yaml
index 795a4a5f60709..00fea43f4578e 100644
--- a/docker/demo/config/test-suite/cow-spark-long-running.yaml
+++ b/docker/demo/config/test-suite/cow-spark-long-running.yaml
@@ -38,7 +38,7 @@ dag_content:
   first_delete:
     config:
       num_partitions_delete: 50
-      num_records_delete: 8000
+      num_records_delete: 4000
     type: SparkDeleteNode
     deps: first_upsert
   second_validate:
diff --git a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml
index 09dd6168c985e..76172203866b0 100644
--- a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml
+++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml
@@ -60,7 +60,7 @@ dag_content:
   first_delete:
     config:
       num_partitions_delete: 50
-      num_records_delete: 8000
+      num_records_delete: 4000
     type: DeleteNode
     deps: first_upsert
   second_hive_sync:
diff --git a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml
index b2ab525b1af65..57c8d010080a0 100644
--- a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml
+++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-metadata.yaml
@@ -54,7 +54,7 @@ dag_content:
   first_delete:
     config:
       num_partitions_delete: 50
-      num_records_delete: 8000
+      num_records_delete: 4000
     type: DeleteNode
     deps: first_upsert
   second_validate:
diff --git a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml
index b8f2b686066c3..a29152bb45431 100644
--- a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml
+++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml
@@ -54,7 +54,7 @@ dag_content:
   first_delete:
     config:
       num_partitions_delete: 50
-      num_records_delete: 8000
+      num_records_delete: 4000
     type: DeleteNode
     deps: first_upsert
   second_validate:
diff --git a/docker/demo/config/test-suite/multi-writer-1-ds.yaml b/docker/demo/config/test-suite/multi-writer-1-ds.yaml
new file mode 100644
index 0000000000000..3fe33b671dc39
--- /dev/null
+++ b/docker/demo/config/test-suite/multi-writer-1-ds.yaml
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: simple-deltastreamer.yaml
+dag_rounds: 3
+dag_intermittent_delay_mins: 0
+dag_content:
+  first_insert:
+    config:
+      record_size: 5000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 1000
+    type: InsertNode
+    deps: none
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 100000
+    deps: first_insert
+    type: InsertNode
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 30000
+    deps: second_insert
+    type: InsertNode
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      num_records_insert: 5000
+      repeat_count: 1
+      num_records_upsert: 50000
+      num_partitions_upsert: 1
+    type: UpsertNode
+    deps: third_insert
+  first_delete:
+    config:
+      num_partitions_delete : 0
+      num_records_delete: 100000
+    type: DeleteNode
+    deps: first_upsert
+  second_validate:
+    config:
+      validate_hive: false
+      delete_input_data: true
+    type: ValidateDatasetNode
+    deps: first_delete
diff --git a/docker/demo/config/test-suite/multi-writer-1.properties b/docker/demo/config/test-suite/multi-writer-1.properties
new file mode 100644
index 0000000000000..502a1b771e8cd
--- /dev/null
+++ b/docker/demo/config/test-suite/multi-writer-1.properties
@@ -0,0 +1,58 @@
+
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+
+hoodie.insert.shuffle.parallelism=2
+hoodie.upsert.shuffle.parallelism=2
+hoodie.bulkinsert.shuffle.parallelism=2
+hoodie.delete.shuffle.parallelism=2
+
+hoodie.metadata.enable=false
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.write.concurrency.mode=optimistic_concurrency_control
+hoodie.cleaner.policy.failed.writes=LAZY
+hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider
+hoodie.write.lock.zookeeper.url=zookeeper:2181
+hoodie.write.lock.zookeeper.port=2181
+hoodie.write.lock.zookeeper.lock_key=locks
+hoodie.write.lock.zookeeper.base_path=/tmp/.locks
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input1
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/multi-writer-2-sds.yaml b/docker/demo/config/test-suite/multi-writer-2-sds.yaml
new file mode 100644
index 0000000000000..9242dd26051ec
--- /dev/null
+++ b/docker/demo/config/test-suite/multi-writer-2-sds.yaml
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: cow-spark-simple.yaml
+dag_rounds: 3
+dag_intermittent_delay_mins: 0
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 100000
+      start_partition: 10
+    type: SparkInsertNode
+    deps: none
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      num_records_insert: 50000
+      repeat_count: 1
+      num_records_upsert: 50000
+      num_partitions_upsert: 1
+      start_partition: 10
+    type: SparkUpsertNode
+    deps: first_insert
+  first_delete:
+    config:
+      num_partitions_delete: 0
+      num_records_delete: 10000
+      start_partition: 10
+    type: SparkDeleteNode
+    deps: first_upsert
+  second_validate:
+    config:
+      validate_hive: false
+      delete_input_data: true
+    type: ValidateDatasetNode
+    deps: first_delete
\ No newline at end of file
diff --git a/docker/demo/config/test-suite/multi-writer-2.properties b/docker/demo/config/test-suite/multi-writer-2.properties
new file mode 100644
index 0000000000000..80db8912b5406
--- /dev/null
+++ b/docker/demo/config/test-suite/multi-writer-2.properties
@@ -0,0 +1,58 @@
+
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+
+hoodie.insert.shuffle.parallelism=2
+hoodie.upsert.shuffle.parallelism=2
+hoodie.bulkinsert.shuffle.parallelism=2
+hoodie.delete.shuffle.parallelism=2
+
+hoodie.metadata.enable=false
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.write.concurrency.mode=optimistic_concurrency_control
+hoodie.cleaner.policy.failed.writes=LAZY
+hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider
+hoodie.write.lock.zookeeper.url=zookeeper:2181
+hoodie.write.lock.zookeeper.port=2181
+hoodie.write.lock.zookeeper.lock_key=locks
+hoodie.write.lock.zookeeper.base_path=/tmp/.locks
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input2
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/multi-writer-local-1.properties b/docker/demo/config/test-suite/multi-writer-local-1.properties
new file mode 100644
index 0000000000000..be16f91c17459
--- /dev/null
+++ b/docker/demo/config/test-suite/multi-writer-local-1.properties
@@ -0,0 +1,57 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=2
+hoodie.upsert.shuffle.parallelism=2
+hoodie.bulkinsert.shuffle.parallelism=2
+hoodie.delete.shuffle.parallelism=2
+
+hoodie.metadata.enable=false
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.write.concurrency.mode=optimistic_concurrency_control
+hoodie.cleaner.policy.failed.writes=LAZY
+hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
+
+hoodie.deltastreamer.source.dfs.root=/tmp/hudi/input1
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/tmp/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/tmp/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/multi-writer-local-2.properties b/docker/demo/config/test-suite/multi-writer-local-2.properties
new file mode 100644
index 0000000000000..08f294ce1461e
--- /dev/null
+++ b/docker/demo/config/test-suite/multi-writer-local-2.properties
@@ -0,0 +1,57 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=2
+hoodie.upsert.shuffle.parallelism=2
+hoodie.bulkinsert.shuffle.parallelism=2
+hoodie.delete.shuffle.parallelism=2
+
+hoodie.metadata.enable=false
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.write.concurrency.mode=optimistic_concurrency_control
+hoodie.cleaner.policy.failed.writes=LAZY
+hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
+
+hoodie.deltastreamer.source.dfs.root=/tmp/hudi/input2
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/tmp/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/tmp/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/simple-clustering-hive.yaml b/docker/demo/config/test-suite/simple-clustering-hive.yaml
index e1f79bfe93c0f..1127bd02b93e8 100644
--- a/docker/demo/config/test-suite/simple-clustering-hive.yaml
+++ b/docker/demo/config/test-suite/simple-clustering-hive.yaml
@@ -44,7 +44,7 @@ dag_content:
   first_delete:
     config:
       num_partitions_delete: 1
-      num_records_delete: 9000
+      num_records_delete: 3000
     type: DeleteNode
     deps: third_insert
   first_hive_sync:
diff --git a/docker/demo/config/test-suite/simple-clustering.yaml b/docker/demo/config/test-suite/simple-clustering.yaml
index 01849bb6436ea..4ede6394cf752 100644
--- a/docker/demo/config/test-suite/simple-clustering.yaml
+++ b/docker/demo/config/test-suite/simple-clustering.yaml
@@ -44,7 +44,7 @@ dag_content:
   first_delete:
     config:
       num_partitions_delete: 1
-      num_records_delete: 9000
+      num_records_delete: 3000
     type: DeleteNode
     deps: third_insert
   first_validate:
diff --git a/docker/demo/config/test-suite/spark-long-running.yaml b/docker/demo/config/test-suite/spark-long-running.yaml
new file mode 100644
index 0000000000000..00fea43f4578e
--- /dev/null
+++ b/docker/demo/config/test-suite/spark-long-running.yaml
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: cow-spark-deltastreamer-long-running-multi-partitions.yaml
+dag_rounds: 30
+dag_intermittent_delay_mins: 0
+dag_content:
+  first_insert:
+    config:
+      record_size: 200
+      num_partitions_insert: 50
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: none
+  first_upsert:
+    config:
+      record_size: 200
+      num_partitions_insert: 50
+      num_records_insert: 300
+      repeat_count: 1
+      num_records_upsert: 3000
+      num_partitions_upsert: 50
+    type: SparkUpsertNode
+    deps: first_insert
+  first_delete:
+    config:
+      num_partitions_delete: 50
+      num_records_delete: 4000
+    type: SparkDeleteNode
+    deps: first_upsert
+  second_validate:
+    config:
+      validate_once_every_itr : 5
+      validate_hive: false
+      delete_input_data: true
+    type: ValidateDatasetNode
+    deps: first_delete
+  last_validate:
+    config:
+      execute_itr_count: 30
+    type: ValidateAsyncOperations
+    deps: second_validate
diff --git a/docker/demo/config/test-suite/spark-simple.yaml b/docker/demo/config/test-suite/spark-simple.yaml
new file mode 100644
index 0000000000000..192adcf377dc0
--- /dev/null
+++ b/docker/demo/config/test-suite/spark-simple.yaml
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: cow-spark-simple.yaml
+dag_rounds: 1
+dag_intermittent_delay_mins: 1
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 100
+    type: SparkInsertNode
+    deps: none
+  first_validate:
+    config:
+      validate_hive: false
+    type: ValidateDatasetNode
+    deps: first_insert
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      num_records_insert: 50
+      repeat_count: 1
+      num_records_upsert: 100
+      num_partitions_upsert: 1
+    type: SparkUpsertNode
+    deps: first_validate
+  first_delete:
+    config:
+      num_partitions_delete: 1
+      num_records_delete: 30
+    type: SparkDeleteNode
+    deps: first_upsert
+  second_validate:
+    config:
+      validate_hive: false
+      delete_input_data: false
+    type: ValidateDatasetNode
+    deps: first_delete
\ No newline at end of file
diff --git a/docker/demo/config/test-suite/templates/spark_command.txt.template b/docker/demo/config/test-suite/templates/spark_command.txt.template
index 563d98b7fb6c3..bf19631b0f427 100644
--- a/docker/demo/config/test-suite/templates/spark_command.txt.template
+++ b/docker/demo/config/test-suite/templates/spark_command.txt.template
@@ -15,7 +15,6 @@
 # limitations under the License.
 
 spark-submit \
---packages org.apache.spark:spark-avro_2.11:2.4.0 \
 --conf spark.task.cpus=1 \
 --conf spark.executor.cores=1 \
 --conf spark.task.maxFailures=100 \
diff --git a/docker/demo/config/test-suite/test-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-aggressive-clean-archival.properties
index dcbbfb31c9936..159c1f233185c 100644
--- a/docker/demo/config/test-suite/test-aggressive-clean-archival.properties
+++ b/docker/demo/config/test-suite/test-aggressive-clean-archival.properties
@@ -22,9 +22,9 @@ hoodie.insert.shuffle.parallelism=100
 hoodie.upsert.shuffle.parallelism=100
 hoodie.bulkinsert.shuffle.parallelism=100
 
-hoodie.cleaner.commits.retained=5
-hoodie.keep.min.commits=9
-hoodie.keep.max.commits=10
+hoodie.cleaner.commits.retained=8
+hoodie.keep.min.commits=12
+hoodie.keep.max.commits=14
 
 hoodie.deltastreamer.source.test.num_partitions=100
 hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
diff --git a/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties
index abddd77ba327a..d079536f95363 100644
--- a/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties
+++ b/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties
@@ -22,9 +22,9 @@ hoodie.insert.shuffle.parallelism=100
 hoodie.upsert.shuffle.parallelism=100
 hoodie.bulkinsert.shuffle.parallelism=100
 
-hoodie.cleaner.commits.retained=5
-hoodie.keep.min.commits=9
-hoodie.keep.max.commits=10
+hoodie.cleaner.commits.retained=8
+hoodie.keep.min.commits=12
+hoodie.keep.max.commits=14
 
 hoodie.deltastreamer.source.test.num_partitions=100
 hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
diff --git a/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties
index 931b1e3a09668..23b95f430408d 100644
--- a/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties
+++ b/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties
@@ -22,9 +22,9 @@ hoodie.insert.shuffle.parallelism=100
 hoodie.upsert.shuffle.parallelism=100
 hoodie.bulkinsert.shuffle.parallelism=100
 
-hoodie.cleaner.commits.retained=5
-hoodie.keep.min.commits=9
-hoodie.keep.max.commits=10
+hoodie.cleaner.commits.retained=8
+hoodie.keep.min.commits=12
+hoodie.keep.max.commits=14
 
 hoodie.metadata.enable=true
 
diff --git a/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties
index 8935ffb4264be..160da83004f44 100644
--- a/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties
+++ b/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties
@@ -22,9 +22,9 @@ hoodie.insert.shuffle.parallelism=100
 hoodie.upsert.shuffle.parallelism=100
 hoodie.bulkinsert.shuffle.parallelism=100
 
-hoodie.cleaner.commits.retained=5
-hoodie.keep.min.commits=9
-hoodie.keep.max.commits=10
+hoodie.cleaner.commits.retained=8
+hoodie.keep.min.commits=12
+hoodie.keep.max.commits=14
 
 hoodie.metadata.enable=true
 
diff --git a/docker/hoodie/hadoop/base/pom.xml b/docker/hoodie/hadoop/base/pom.xml
index bb0ec788e0f3a..4b1b024958121 100644
--- a/docker/hoodie/hadoop/base/pom.xml
+++ b/docker/hoodie/hadoop/base/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/base_java11/pom.xml b/docker/hoodie/hadoop/base_java11/pom.xml
index b5d5a3ec16b94..a1181b53e07a0 100644
--- a/docker/hoodie/hadoop/base_java11/pom.xml
+++ b/docker/hoodie/hadoop/base_java11/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/datanode/pom.xml b/docker/hoodie/hadoop/datanode/pom.xml
index e9b4e4fbac6a7..9fb5e222d3f2e 100644
--- a/docker/hoodie/hadoop/datanode/pom.xml
+++ b/docker/hoodie/hadoop/datanode/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/historyserver/pom.xml b/docker/hoodie/hadoop/historyserver/pom.xml
index dbdb7182f6827..b439a88fbd2e6 100644
--- a/docker/hoodie/hadoop/historyserver/pom.xml
+++ b/docker/hoodie/hadoop/historyserver/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/hive_base/pom.xml b/docker/hoodie/hadoop/hive_base/pom.xml
index e267e8487e79c..ff73965e02c7b 100644
--- a/docker/hoodie/hadoop/hive_base/pom.xml
+++ b/docker/hoodie/hadoop/hive_base/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/namenode/pom.xml b/docker/hoodie/hadoop/namenode/pom.xml
index 71998198f53c3..9ad31d9161d1c 100644
--- a/docker/hoodie/hadoop/namenode/pom.xml
+++ b/docker/hoodie/hadoop/namenode/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml
index b029abafa83e5..3f4a0183d80f8 100644
--- a/docker/hoodie/hadoop/pom.xml
+++ b/docker/hoodie/hadoop/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/docker/hoodie/hadoop/prestobase/pom.xml b/docker/hoodie/hadoop/prestobase/pom.xml
index 98502aa0fe2ff..2ff23833599b2 100644
--- a/docker/hoodie/hadoop/prestobase/pom.xml
+++ b/docker/hoodie/hadoop/prestobase/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/spark_base/pom.xml b/docker/hoodie/hadoop/spark_base/pom.xml
index 8cd1ee2dea3c6..8c7e8ac241c9d 100644
--- a/docker/hoodie/hadoop/spark_base/pom.xml
+++ b/docker/hoodie/hadoop/spark_base/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/sparkadhoc/pom.xml b/docker/hoodie/hadoop/sparkadhoc/pom.xml
index e554313ace23e..d61eb170f67b0 100644
--- a/docker/hoodie/hadoop/sparkadhoc/pom.xml
+++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/sparkmaster/pom.xml b/docker/hoodie/hadoop/sparkmaster/pom.xml
index 3a8dabc4afc3c..40fb732cafe6a 100644
--- a/docker/hoodie/hadoop/sparkmaster/pom.xml
+++ b/docker/hoodie/hadoop/sparkmaster/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/sparkworker/pom.xml b/docker/hoodie/hadoop/sparkworker/pom.xml
index 6a79f8debc161..3304a2e5a7e6e 100644
--- a/docker/hoodie/hadoop/sparkworker/pom.xml
+++ b/docker/hoodie/hadoop/sparkworker/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/trinobase/pom.xml b/docker/hoodie/hadoop/trinobase/pom.xml
index 5a290556e495b..180a977915e91 100644
--- a/docker/hoodie/hadoop/trinobase/pom.xml
+++ b/docker/hoodie/hadoop/trinobase/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hudi-hadoop-docker</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/trinocoordinator/pom.xml b/docker/hoodie/hadoop/trinocoordinator/pom.xml
index 564a2083e7dd9..aa199bad54db5 100644
--- a/docker/hoodie/hadoop/trinocoordinator/pom.xml
+++ b/docker/hoodie/hadoop/trinocoordinator/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hudi-hadoop-docker</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <packaging>pom</packaging>
diff --git a/docker/hoodie/hadoop/trinoworker/pom.xml b/docker/hoodie/hadoop/trinoworker/pom.xml
index 54f56b9e11309..8825093846434 100644
--- a/docker/hoodie/hadoop/trinoworker/pom.xml
+++ b/docker/hoodie/hadoop/trinoworker/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hudi-hadoop-docker</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <packaging>pom</packaging>
diff --git a/docker/push_to_docker_hub.png b/docker/images/push_to_docker_hub.png
similarity index 100%
rename from docker/push_to_docker_hub.png
rename to docker/images/push_to_docker_hub.png
diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml
index d44a389a61f66..2429e47943812 100644
--- a/hudi-aws/pom.xml
+++ b/hudi-aws/pom.xml
@@ -19,12 +19,12 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-aws</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
 
     <name>hudi-aws</name>
     <packaging>jar</packaging>
@@ -40,6 +40,11 @@
             <artifactId>hudi-common</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hive-sync</artifactId>
+            <version>${project.version}</version>
+        </dependency>
 
         <!-- Logging -->
         <dependency>
@@ -75,6 +80,28 @@
             <version>${dynamodb.lockclient.version}</version>
         </dependency>
 
+        <!-- Hive -->
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-service</artifactId>
+            <version>${hive.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+        </dependency>
+
         <!-- AWS SDK -->
         <dependency>
             <groupId>com.amazonaws</groupId>
@@ -103,6 +130,12 @@
             <groupId>io.dropwizard.metrics</groupId>
             <artifactId>metrics-core</artifactId>
         </dependency>
+      <!-- https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-glue -->
+      <dependency>
+        <groupId>com.amazonaws</groupId>
+        <artifactId>aws-java-sdk-glue</artifactId>
+        <version>${aws.sdk.version}</version>
+      </dependency>
 
         <!-- Test -->
         <dependency>
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
new file mode 100644
index 0000000000000..97e47deed8173
--- /dev/null
+++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
@@ -0,0 +1,479 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.aws.sync;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.hive.AbstractHiveSyncHoodieClient;
+import org.apache.hudi.hive.HiveSyncConfig;
+import org.apache.hudi.sync.common.model.Partition;
+
+import com.amazonaws.services.glue.AWSGlue;
+import com.amazonaws.services.glue.AWSGlueClientBuilder;
+import com.amazonaws.services.glue.model.AlreadyExistsException;
+import com.amazonaws.services.glue.model.BatchCreatePartitionRequest;
+import com.amazonaws.services.glue.model.BatchCreatePartitionResult;
+import com.amazonaws.services.glue.model.BatchUpdatePartitionRequest;
+import com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry;
+import com.amazonaws.services.glue.model.BatchUpdatePartitionResult;
+import com.amazonaws.services.glue.model.Column;
+import com.amazonaws.services.glue.model.CreateDatabaseRequest;
+import com.amazonaws.services.glue.model.CreateDatabaseResult;
+import com.amazonaws.services.glue.model.CreateTableRequest;
+import com.amazonaws.services.glue.model.CreateTableResult;
+import com.amazonaws.services.glue.model.DatabaseInput;
+import com.amazonaws.services.glue.model.EntityNotFoundException;
+import com.amazonaws.services.glue.model.GetDatabaseRequest;
+import com.amazonaws.services.glue.model.GetPartitionsRequest;
+import com.amazonaws.services.glue.model.GetPartitionsResult;
+import com.amazonaws.services.glue.model.GetTableRequest;
+import com.amazonaws.services.glue.model.PartitionInput;
+import com.amazonaws.services.glue.model.SerDeInfo;
+import com.amazonaws.services.glue.model.StorageDescriptor;
+import com.amazonaws.services.glue.model.Table;
+import com.amazonaws.services.glue.model.TableInput;
+import com.amazonaws.services.glue.model.UpdateTableRequest;
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.parquet.schema.MessageType;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.aws.utils.S3Utils.s3aToS3;
+import static org.apache.hudi.common.util.MapUtils.nonEmpty;
+import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType;
+import static org.apache.hudi.hive.util.HiveSchemaUtil.parquetSchemaToMapSchema;
+import static org.apache.hudi.sync.common.util.TableUtils.tableId;
+
+/**
+ * This class implements all the AWS APIs to enable syncing of a Hudi Table with the
+ * AWS Glue Data Catalog (https://docs.aws.amazon.com/glue/latest/dg/populate-data-catalog.html).
+ */
+public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
+
+  private static final Logger LOG = LogManager.getLogger(AWSGlueCatalogSyncClient.class);
+  private static final int MAX_PARTITIONS_PER_REQUEST = 100;
+  private static final long BATCH_REQUEST_SLEEP_MILLIS = 1000L;
+  private final AWSGlue awsGlue;
+  private final String databaseName;
+
+  public AWSGlueCatalogSyncClient(HiveSyncConfig syncConfig, Configuration hadoopConf, FileSystem fs) {
+    super(syncConfig, hadoopConf, fs);
+    this.awsGlue = AWSGlueClientBuilder.standard().build();
+    this.databaseName = syncConfig.databaseName;
+  }
+
+  @Override
+  public List<Partition> getAllPartitions(String tableName) {
+    try {
+      GetPartitionsRequest request = new GetPartitionsRequest();
+      request.withDatabaseName(databaseName).withTableName(tableName);
+      GetPartitionsResult result = awsGlue.getPartitions(request);
+      return result.getPartitions()
+          .stream()
+          .map(p -> new Partition(p.getValues(), p.getStorageDescriptor().getLocation()))
+          .collect(Collectors.toList());
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Failed to get all partitions for table " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public void addPartitionsToTable(String tableName, List<String> partitionsToAdd) {
+    if (partitionsToAdd.isEmpty()) {
+      LOG.info("No partitions to add for " + tableId(databaseName, tableName));
+      return;
+    }
+    LOG.info("Adding " + partitionsToAdd.size() + " partition(s) in table " + tableId(databaseName, tableName));
+    try {
+      Table table = getTable(awsGlue, databaseName, tableName);
+      StorageDescriptor sd = table.getStorageDescriptor();
+      List<PartitionInput> partitionInputs = partitionsToAdd.stream().map(partition -> {
+        StorageDescriptor partitionSd = sd.clone();
+        String fullPartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition).toString();
+        List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
+        partitionSd.setLocation(fullPartitionPath);
+        return new PartitionInput().withValues(partitionValues).withStorageDescriptor(partitionSd);
+      }).collect(Collectors.toList());
+
+      for (List<PartitionInput> batch : CollectionUtils.batches(partitionInputs, MAX_PARTITIONS_PER_REQUEST)) {
+        BatchCreatePartitionRequest request = new BatchCreatePartitionRequest();
+        request.withDatabaseName(databaseName).withTableName(tableName).withPartitionInputList(batch);
+
+        BatchCreatePartitionResult result = awsGlue.batchCreatePartition(request);
+        if (CollectionUtils.nonEmpty(result.getErrors())) {
+          throw new HoodieGlueSyncException("Fail to add partitions to " + tableId(databaseName, tableName)
+              + " with error(s): " + result.getErrors());
+        }
+        Thread.sleep(BATCH_REQUEST_SLEEP_MILLIS);
+      }
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to add partitions to " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
+    if (changedPartitions.isEmpty()) {
+      LOG.info("No partitions to change for " + tableName);
+      return;
+    }
+    LOG.info("Updating " + changedPartitions.size() + "partition(s) in table " + tableId(databaseName, tableName));
+    try {
+      Table table = getTable(awsGlue, databaseName, tableName);
+      StorageDescriptor sd = table.getStorageDescriptor();
+      List<BatchUpdatePartitionRequestEntry> updatePartitionEntries = changedPartitions.stream().map(partition -> {
+        StorageDescriptor partitionSd = sd.clone();
+        String fullPartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition).toString();
+        List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
+        sd.setLocation(fullPartitionPath);
+        PartitionInput partitionInput = new PartitionInput().withValues(partitionValues).withStorageDescriptor(partitionSd);
+        return new BatchUpdatePartitionRequestEntry().withPartitionInput(partitionInput).withPartitionValueList(partitionValues);
+      }).collect(Collectors.toList());
+
+      for (List<BatchUpdatePartitionRequestEntry> batch : CollectionUtils.batches(updatePartitionEntries, MAX_PARTITIONS_PER_REQUEST)) {
+        BatchUpdatePartitionRequest request = new BatchUpdatePartitionRequest();
+        request.withDatabaseName(databaseName).withTableName(tableName).withEntries(batch);
+
+        BatchUpdatePartitionResult result = awsGlue.batchUpdatePartition(request);
+        if (CollectionUtils.nonEmpty(result.getErrors())) {
+          throw new HoodieGlueSyncException("Fail to update partitions to " + tableId(databaseName, tableName)
+              + " with error(s): " + result.getErrors());
+        }
+        Thread.sleep(BATCH_REQUEST_SLEEP_MILLIS);
+      }
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to update partitions to " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public void dropPartitions(String tableName, List<String> partitionsToDrop) {
+    throw new UnsupportedOperationException("Not support dropPartitionsToTable yet.");
+  }
+
+  /**
+   * Update the table properties to the table.
+   */
+  @Override
+  public void updateTableProperties(String tableName, Map<String, String> tableProperties) {
+    if (nonEmpty(tableProperties)) {
+      return;
+    }
+    try {
+      updateTableParameters(awsGlue, databaseName, tableName, tableProperties, true);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to update properties for table " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public void updateTableDefinition(String tableName, MessageType newSchema) {
+    // ToDo Cascade is set in Hive meta sync, but need to investigate how to configure it for Glue meta
+    boolean cascade = syncConfig.partitionFields.size() > 0;
+    try {
+      Table table = getTable(awsGlue, databaseName, tableName);
+      Map<String, String> newSchemaMap = parquetSchemaToMapSchema(newSchema, syncConfig.supportTimestamp, false);
+      List<Column> newColumns = newSchemaMap.keySet().stream().map(key -> {
+        String keyType = getPartitionKeyType(newSchemaMap, key);
+        return new Column().withName(key).withType(keyType.toLowerCase()).withComment("");
+      }).collect(Collectors.toList());
+      StorageDescriptor sd = table.getStorageDescriptor();
+      sd.setColumns(newColumns);
+
+      final Date now = new Date();
+      TableInput updatedTableInput = new TableInput()
+          .withName(tableName)
+          .withTableType(table.getTableType())
+          .withParameters(table.getParameters())
+          .withPartitionKeys(table.getPartitionKeys())
+          .withStorageDescriptor(sd)
+          .withLastAccessTime(now)
+          .withLastAnalyzedTime(now);
+
+      UpdateTableRequest request = new UpdateTableRequest()
+          .withDatabaseName(databaseName)
+          .withTableInput(updatedTableInput);
+
+      awsGlue.updateTable(request);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to update definition for table " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `getTableCommentUsingMetastoreClient`");
+  }
+
+  @Override
+  public void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema) {
+    throw new UnsupportedOperationException("Not supported: `updateTableComments`");
+  }
+
+  @Override
+  public void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String, String> newComments) {
+    throw new UnsupportedOperationException("Not supported: `updateTableComments`");
+  }
+
+  @Override
+  public void createTable(String tableName,
+      MessageType storageSchema,
+      String inputFormatClass,
+      String outputFormatClass,
+      String serdeClass,
+      Map<String, String> serdeProperties,
+      Map<String, String> tableProperties) {
+    if (tableExists(tableName)) {
+      return;
+    }
+    CreateTableRequest request = new CreateTableRequest();
+    Map<String, String> params = new HashMap<>();
+    if (!syncConfig.createManagedTable) {
+      params.put("EXTERNAL", "TRUE");
+    }
+    params.putAll(tableProperties);
+
+    try {
+      Map<String, String> mapSchema = parquetSchemaToMapSchema(storageSchema, syncConfig.supportTimestamp, false);
+
+      List<Column> schemaPartitionKeys = new ArrayList<>();
+      List<Column> schemaWithoutPartitionKeys = new ArrayList<>();
+      for (String key : mapSchema.keySet()) {
+        String keyType = getPartitionKeyType(mapSchema, key);
+        Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment("");
+        // In Glue, the full schema should exclude the partition keys
+        if (syncConfig.partitionFields.contains(key)) {
+          schemaPartitionKeys.add(column);
+        } else {
+          schemaWithoutPartitionKeys.add(column);
+        }
+      }
+
+      StorageDescriptor storageDescriptor = new StorageDescriptor();
+      serdeProperties.put("serialization.format", "1");
+      storageDescriptor
+          .withSerdeInfo(new SerDeInfo().withSerializationLibrary(serdeClass).withParameters(serdeProperties))
+          .withLocation(s3aToS3(syncConfig.basePath))
+          .withInputFormat(inputFormatClass)
+          .withOutputFormat(outputFormatClass)
+          .withColumns(schemaWithoutPartitionKeys);
+
+      final Date now = new Date();
+      TableInput tableInput = new TableInput()
+          .withName(tableName)
+          .withTableType(TableType.EXTERNAL_TABLE.toString())
+          .withParameters(params)
+          .withPartitionKeys(schemaPartitionKeys)
+          .withStorageDescriptor(storageDescriptor)
+          .withLastAccessTime(now)
+          .withLastAnalyzedTime(now);
+      request.withDatabaseName(databaseName)
+          .withTableInput(tableInput);
+
+      CreateTableResult result = awsGlue.createTable(request);
+      LOG.info("Created table " + tableId(databaseName, tableName) + " : " + result);
+    } catch (AlreadyExistsException e) {
+      LOG.warn("Table " + tableId(databaseName, tableName) + " already exists.", e);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to create " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public Map<String, String> getTableSchema(String tableName) {
+    try {
+      // GlueMetastoreClient returns partition keys separate from Columns, hence get both and merge to
+      // get the Schema of the table.
+      Table table = getTable(awsGlue, databaseName, tableName);
+      Map<String, String> partitionKeysMap =
+          table.getPartitionKeys().stream().collect(Collectors.toMap(Column::getName, f -> f.getType().toUpperCase()));
+
+      Map<String, String> columnsMap =
+          table.getStorageDescriptor().getColumns().stream().collect(Collectors.toMap(Column::getName, f -> f.getType().toUpperCase()));
+
+      Map<String, String> schema = new HashMap<>();
+      schema.putAll(columnsMap);
+      schema.putAll(partitionKeysMap);
+      return schema;
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to get schema for table " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public boolean doesTableExist(String tableName) {
+    return tableExists(tableName);
+  }
+
+  @Override
+  public boolean tableExists(String tableName) {
+    GetTableRequest request = new GetTableRequest()
+        .withDatabaseName(databaseName)
+        .withName(tableName);
+    try {
+      return Objects.nonNull(awsGlue.getTable(request).getTable());
+    } catch (EntityNotFoundException e) {
+      LOG.info("Table not found: " + tableId(databaseName, tableName), e);
+      return false;
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to get table: " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public boolean databaseExists(String databaseName) {
+    GetDatabaseRequest request = new GetDatabaseRequest();
+    request.setName(databaseName);
+    try {
+      return Objects.nonNull(awsGlue.getDatabase(request).getDatabase());
+    } catch (EntityNotFoundException e) {
+      LOG.info("Database not found: " + databaseName, e);
+      return false;
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to check if database exists " + databaseName, e);
+    }
+  }
+
+  @Override
+  public void createDatabase(String databaseName) {
+    if (databaseExists(databaseName)) {
+      return;
+    }
+    CreateDatabaseRequest request = new CreateDatabaseRequest();
+    request.setDatabaseInput(new DatabaseInput()
+        .withName(databaseName)
+        .withDescription("Automatically created by " + this.getClass().getName())
+        .withParameters(null)
+        .withLocationUri(null));
+    try {
+      CreateDatabaseResult result = awsGlue.createDatabase(request);
+      LOG.info("Successfully created database in AWS Glue: " + result.toString());
+    } catch (AlreadyExistsException e) {
+      LOG.warn("AWS Glue Database " + databaseName + " already exists", e);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to create database " + databaseName, e);
+    }
+  }
+
+  @Override
+  public Option<String> getLastCommitTimeSynced(String tableName) {
+    try {
+      Table table = getTable(awsGlue, databaseName, tableName);
+      return Option.of(table.getParameters().getOrDefault(HOODIE_LAST_COMMIT_TIME_SYNC, null));
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to get last sync commit time for " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public void close() {
+    awsGlue.shutdown();
+  }
+
+  @Override
+  public void updateLastCommitTimeSynced(String tableName) {
+    if (!activeTimeline.lastInstant().isPresent()) {
+      LOG.warn("No commit in active timeline.");
+      return;
+    }
+    final String lastCommitTimestamp = activeTimeline.lastInstant().get().getTimestamp();
+    try {
+      updateTableParameters(awsGlue, databaseName, tableName, Collections.singletonMap(HOODIE_LAST_COMMIT_TIME_SYNC, lastCommitTimestamp), false);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to update last sync commit time for " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  @Override
+  public Option<String> getLastReplicatedTime(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `getLastReplicatedTime`");
+  }
+
+  @Override
+  public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
+    throw new UnsupportedOperationException("Not supported: `updateLastReplicatedTimeStamp`");
+  }
+
+  @Override
+  public void deleteLastReplicatedTimeStamp(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `deleteLastReplicatedTimeStamp`");
+  }
+
+  private enum TableType {
+    MANAGED_TABLE,
+    EXTERNAL_TABLE,
+    VIRTUAL_VIEW,
+    INDEX_TABLE,
+    MATERIALIZED_VIEW
+  }
+
+  private static Table getTable(AWSGlue awsGlue, String databaseName, String tableName) throws HoodieGlueSyncException {
+    GetTableRequest request = new GetTableRequest()
+        .withDatabaseName(databaseName)
+        .withName(tableName);
+    try {
+      return awsGlue.getTable(request).getTable();
+    } catch (EntityNotFoundException e) {
+      throw new HoodieGlueSyncException("Table not found: " + tableId(databaseName, tableName), e);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to get table " + tableId(databaseName, tableName), e);
+    }
+  }
+
+  private static void updateTableParameters(AWSGlue awsGlue, String databaseName, String tableName, Map<String, String> updatingParams, boolean shouldReplace) {
+    final Map<String, String> newParams = new HashMap<>();
+    try {
+      Table table = getTable(awsGlue, databaseName, tableName);
+      if (!shouldReplace) {
+        newParams.putAll(table.getParameters());
+      }
+      newParams.putAll(updatingParams);
+
+      final Date now = new Date();
+      TableInput updatedTableInput = new TableInput()
+          .withName(tableName)
+          .withTableType(table.getTableType())
+          .withParameters(newParams)
+          .withPartitionKeys(table.getPartitionKeys())
+          .withStorageDescriptor(table.getStorageDescriptor())
+          .withLastAccessTime(now)
+          .withLastAnalyzedTime(now);
+
+      UpdateTableRequest request = new UpdateTableRequest();
+      request.withDatabaseName(databaseName)
+          .withTableInput(updatedTableInput);
+      awsGlue.updateTable(request);
+    } catch (Exception e) {
+      throw new HoodieGlueSyncException("Fail to update params for table " + tableId(databaseName, tableName) + ": " + newParams, e);
+    }
+  }
+}
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java
new file mode 100644
index 0000000000000..bb1be377c9cae
--- /dev/null
+++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.aws.sync;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.hive.HiveSyncConfig;
+import org.apache.hudi.hive.HiveSyncTool;
+
+import com.beust.jcommander.JCommander;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+/**
+ * Currently Experimental. Utility class that implements syncing a Hudi Table with the
+ * AWS Glue Data Catalog (https://docs.aws.amazon.com/glue/latest/dg/populate-data-catalog.html)
+ * to enable querying via Glue ETLs, Athena etc.
+ *
+ * Extends HiveSyncTool since most logic is similar to Hive syncing,
+ * expect using a different client {@link AWSGlueCatalogSyncClient} that implements
+ * the necessary functionality using Glue APIs.
+ *
+ * @Experimental
+ */
+public class AwsGlueCatalogSyncTool extends HiveSyncTool {
+
+  public AwsGlueCatalogSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
+    super(props, new HiveConf(conf, HiveConf.class), fs);
+  }
+
+  public AwsGlueCatalogSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
+    super(hiveSyncConfig, hiveConf, fs);
+  }
+
+  @Override
+  protected void initClient(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
+    hoodieHiveClient = new AWSGlueCatalogSyncClient(hiveSyncConfig, hiveConf, fs);
+  }
+
+  public static void main(String[] args) {
+    // parse the params
+    final HiveSyncConfig cfg = new HiveSyncConfig();
+    JCommander cmd = new JCommander(cfg, null, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+    FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration());
+    HiveConf hiveConf = new HiveConf();
+    hiveConf.addResource(fs.getConf());
+    new AwsGlueCatalogSyncTool(cfg, hiveConf, fs).syncHoodieTable();
+  }
+}
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/HoodieGlueSyncException.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/HoodieGlueSyncException.java
new file mode 100644
index 0000000000000..5b788ebf317ee
--- /dev/null
+++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/HoodieGlueSyncException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.aws.sync;
+
+import org.apache.hudi.hive.HoodieHiveSyncException;
+
+public class HoodieGlueSyncException extends HoodieHiveSyncException {
+
+  public HoodieGlueSyncException(String message) {
+    super(message);
+  }
+
+  public HoodieGlueSyncException(String message, Throwable t) {
+    super(message, t);
+  }
+}
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/transaction/lock/DynamoDBBasedLockProvider.java b/hudi-aws/src/main/java/org/apache/hudi/aws/transaction/lock/DynamoDBBasedLockProvider.java
index 57348bea426ce..1d72f71844a49 100644
--- a/hudi-aws/src/main/java/org/apache/hudi/aws/transaction/lock/DynamoDBBasedLockProvider.java
+++ b/hudi-aws/src/main/java/org/apache/hudi/aws/transaction/lock/DynamoDBBasedLockProvider.java
@@ -156,9 +156,9 @@ public LockItem getLock() {
 
   private AmazonDynamoDB getDynamoDBClient() {
     String region = this.lockConfiguration.getConfig().getString(DynamoDbBasedLockConfig.DYNAMODB_LOCK_REGION.key());
-    String endpointURL = this.lockConfiguration.getConfig().getString(DynamoDbBasedLockConfig.DYNAMODB_ENDPOINT_URL.key()) == null
-                          ? RegionUtils.getRegion(region).getServiceEndpoint(AmazonDynamoDB.ENDPOINT_PREFIX)
-                          : this.lockConfiguration.getConfig().getString(DynamoDbBasedLockConfig.DYNAMODB_ENDPOINT_URL.key());
+    String endpointURL = this.lockConfiguration.getConfig().containsKey(DynamoDbBasedLockConfig.DYNAMODB_ENDPOINT_URL.key())
+                          ? this.lockConfiguration.getConfig().getString(DynamoDbBasedLockConfig.DYNAMODB_ENDPOINT_URL.key())
+                          : RegionUtils.getRegion(region).getServiceEndpoint(AmazonDynamoDB.ENDPOINT_PREFIX);
     AwsClientBuilder.EndpointConfiguration dynamodbEndpoint =
             new AwsClientBuilder.EndpointConfiguration(endpointURL, region);
     return AmazonDynamoDBClientBuilder.standard()
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/utils/S3Utils.java b/hudi-aws/src/main/java/org/apache/hudi/aws/utils/S3Utils.java
new file mode 100644
index 0000000000000..bfb208ee15058
--- /dev/null
+++ b/hudi-aws/src/main/java/org/apache/hudi/aws/utils/S3Utils.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.aws.utils;
+
+public final class S3Utils {
+
+  public static String s3aToS3(String s3aUrl) {
+    return s3aUrl.replaceFirst("(?i)^s3a://", "s3://");
+  }
+}
diff --git a/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java b/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java
index 19c63ea786559..1894b8641c1be 100644
--- a/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java
+++ b/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java
@@ -103,8 +103,8 @@ public class DynamoDbBasedLockConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> DYNAMODB_ENDPOINT_URL = ConfigProperty
       .key(DYNAMODB_BASED_LOCK_PROPERTY_PREFIX + "endpoint_url")
-      .defaultValue("us-east-1")
-      .sinceVersion("0.11.0")
+      .noDefaultValue()
+      .sinceVersion("0.10.1")
       .withDocumentation("For DynamoDB based lock provider, the url endpoint used for Amazon DynamoDB service."
                          + " Useful for development with a local dynamodb instance.");
 }
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index 29bdf85ab08c5..5c68ef7416449 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
@@ -225,10 +225,6 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-    </dependency>
 
     <dependency>
       <groupId>org.springframework.shell</groupId>
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java
index e317d5a4f5364..a4a8e46dfd2e0 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java
@@ -83,6 +83,8 @@ public class HoodieTableHeaderFields {
   public static final String HEADER_HOODIE_PROPERTY = "Property";
   public static final String HEADER_OLD_VALUE = "Old Value";
   public static final String HEADER_NEW_VALUE = "New Value";
+  public static final String HEADER_TEXT_METAFILE_PRESENT = "Text Metafile present ?";
+  public static final String HEADER_BASE_METAFILE_PRESENT = "Base Metafile present ?";
 
   /**
    * Fields of Savepoints.
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
index 1747a59f4f366..fcb273f0a73bd 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
@@ -81,7 +81,7 @@ public String showArchivedCommits(
       // read the avro blocks
       while (reader.hasNext()) {
         HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-        blk.getRecordItr().forEachRemaining(readRecords::add);
+        blk.getRecordIterator().forEachRemaining(readRecords::add);
       }
       List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
           .filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION)
@@ -155,7 +155,7 @@ public String showCommits(
       // read the avro blocks
       while (reader.hasNext()) {
         HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-        try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
+        try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
           recordItr.forEachRemaining(readRecords::add);
         }
       }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
index 1d8d6dcd6ae93..fa6e15b7af696 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
@@ -124,7 +124,7 @@ private int copyArchivedInstants(List<FileStatus> statuses, Set<String> actionSe
       // read the avro blocks
       while (reader.hasNext() && copyCount < limit) {
         HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-        try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
+        try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
           while (recordItr.hasNext()) {
             IndexedRecord ir = recordItr.next();
             // Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
index 4a56858f3926a..b1c5531a22fd0 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
@@ -67,6 +67,9 @@
 import scala.Tuple2;
 import scala.Tuple3;
 
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+
 /**
  * CLI command to display log file options.
  */
@@ -122,7 +125,7 @@ public String showLogFileCommits(
             instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
           }
           if (n instanceof HoodieDataBlock) {
-            try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordItr()) {
+            try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordIterator()) {
               recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
             }
           }
@@ -185,7 +188,7 @@ public String showLogFileRecords(
         .collect(Collectors.toList());
 
     // logFilePaths size must > 1
-    assert logFilePaths.size() > 0 : "There is no log file";
+    checkArgument(logFilePaths.size() > 0, "There is no log file");
 
     // TODO : readerSchema can change across blocks/log files, fix this inside Scanner
     AvroSchemaConverter converter = new AvroSchemaConverter();
@@ -218,6 +221,7 @@ public String showLogFileRecords(
               .withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue())
               .withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
               .withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
+              .withPartition(getRelativePartitionPath(new Path(client.getBasePath()), new Path(logFilePaths.get(0)).getParent()))
               .build();
       for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
         Option<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
@@ -236,7 +240,7 @@ public String showLogFileRecords(
           HoodieLogBlock n = reader.next();
           if (n instanceof HoodieDataBlock) {
             HoodieDataBlock blk = (HoodieDataBlock) n;
-            try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
+            try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
               recordItr.forEachRemaining(record -> {
                 if (allRecords.size() < limit) {
                   allRecords.add(record);
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
index 6c068c898b9be..ac1701915773c 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.cli.utils.InputStreamConsumer;
 import org.apache.hudi.cli.utils.SparkUtil;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.table.HoodieTableConfig;
@@ -31,11 +32,13 @@
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.CleanerUtils;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieIOException;
 
 import org.apache.avro.AvroRuntimeException;
 import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.util.StringUtils;
+
 import org.apache.log4j.Logger;
 import org.apache.spark.launcher.SparkLauncher;
 import org.apache.spark.util.Utils;
@@ -133,7 +136,8 @@ public String addPartitionMeta(
         row[1] = "No";
         if (!dryRun) {
           HoodiePartitionMetadata partitionMetadata =
-              new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath, partitionPath);
+              new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath, partitionPath,
+                  client.getTableConfig().getPartitionMetafileFormat());
           partitionMetadata.trySave(0);
           row[2] = "Repaired";
         }
@@ -199,4 +203,64 @@ public void removeCorruptedPendingCleanAction() {
       }
     });
   }
+
+  @CliCommand(value = "repair migrate-partition-meta", help = "Migrate all partition meta file currently stored in text format "
+      + "to be stored in base file format. See HoodieTableConfig#PARTITION_METAFILE_USE_DATA_FORMAT.")
+  public String migratePartitionMeta(
+      @CliOption(key = {"dryrun"}, help = "dry run without modifying anything.", unspecifiedDefaultValue = "true") final boolean dryRun)
+      throws IOException {
+
+    HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(HoodieCLI.conf);
+    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
+    List<String> partitionPaths = FSUtils.getAllPartitionPaths(engineContext, client.getBasePath(), false, false);
+    Path basePath = new Path(client.getBasePath());
+
+    String[][] rows = new String[partitionPaths.size()][];
+    int ind = 0;
+    for (String partitionPath : partitionPaths) {
+      Path partition = FSUtils.getPartitionPath(client.getBasePath(), partitionPath);
+      Option<Path> textFormatFile = HoodiePartitionMetadata.textFormatMetaPathIfExists(HoodieCLI.fs, partition);
+      Option<Path> baseFormatFile = HoodiePartitionMetadata.baseFormatMetaPathIfExists(HoodieCLI.fs, partition);
+      String latestCommit = client.getActiveTimeline().getCommitTimeline().lastInstant().get().getTimestamp();
+
+      String[] row = new String[] {
+          partitionPath,
+          String.valueOf(textFormatFile.isPresent()),
+          String.valueOf(baseFormatFile.isPresent()),
+          textFormatFile.isPresent() ? "MIGRATE" : "NONE"
+      };
+
+      if (!dryRun) {
+        if (!baseFormatFile.isPresent()) {
+          HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath, partition,
+              Option.of(client.getTableConfig().getBaseFileFormat()));
+          partitionMetadata.trySave(0);
+        }
+
+        // delete it, in case we failed midway last time.
+        textFormatFile.ifPresent(path -> {
+          try {
+            HoodieCLI.fs.delete(path, false);
+          } catch (IOException e) {
+            throw new HoodieIOException(e.getMessage(), e);
+          }
+        });
+
+        row[3] = "MIGRATED";
+      }
+
+      rows[ind++] = row;
+    }
+
+    Properties props = new Properties();
+    props.setProperty(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key(), "true");
+    HoodieTableConfig.update(HoodieCLI.fs, new Path(client.getMetaPath()), props);
+
+    return HoodiePrintHelper.print(new String[] {
+        HoodieTableHeaderFields.HEADER_PARTITION_PATH,
+        HoodieTableHeaderFields.HEADER_TEXT_METAFILE_PRESENT,
+        HoodieTableHeaderFields.HEADER_BASE_METAFILE_PRESENT,
+        HoodieTableHeaderFields.HEADER_ACTION
+    }, rows);
+  }
 }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java
index 8b6d47b9a9aaf..e5b07fd99f403 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java
@@ -93,6 +93,8 @@ public String rollbackToSavepoint(
       @CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String instantTime,
       @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
       @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
+      @CliOption(key = {"lazyFailedWritesCleanPolicy"}, help = "True if FailedWriteCleanPolicy is lazy",
+          unspecifiedDefaultValue = "false") final String lazyFailedWritesCleanPolicy,
       @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
           help = "Spark executor memory") final String sparkMemory)
       throws Exception {
@@ -110,7 +112,7 @@ public String rollbackToSavepoint(
 
     SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
     sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), master, sparkMemory,
-        instantTime, metaClient.getBasePath());
+        instantTime, metaClient.getBasePath(), lazyFailedWritesCleanPolicy);
     Process process = sparkLauncher.launch();
     InputStreamConsumer.captureOutput(process);
     int exitCode = process.waitFor();
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
index 0de1a1adfe0be..323c7bb5c36e0 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.cli.DeDupeType;
 import org.apache.hudi.cli.DedupeSparkJob;
@@ -28,6 +27,7 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.HoodieTableVersion;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
@@ -35,6 +35,7 @@
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieBootstrapConfig;
+import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieSavepointException;
@@ -52,9 +53,10 @@
 import org.apache.hudi.utilities.HoodieCompactionAdminTool;
 import org.apache.hudi.utilities.HoodieCompactionAdminTool.Operation;
 import org.apache.hudi.utilities.HoodieCompactor;
-import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.deltastreamer.BootstrapExecutor;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
+
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SQLContext;
@@ -65,6 +67,12 @@
 import java.util.List;
 import java.util.Locale;
 
+import static org.apache.hudi.utilities.UtilHelpers.EXECUTE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE_AND_EXECUTE;
+import static org.apache.hudi.utilities.UtilHelpers.buildProperties;
+import static org.apache.hudi.utilities.UtilHelpers.readConfig;
+
 /**
  * This class deals with initializing spark context based on command entered to hudi-cli.
  */
@@ -102,8 +110,8 @@ public static void main(String[] args) throws Exception {
           returnCode = deduplicatePartitionPath(jsc, args[3], args[4], args[5], Boolean.parseBoolean(args[6]), args[7]);
           break;
         case ROLLBACK_TO_SAVEPOINT:
-          assert (args.length == 5);
-          returnCode = rollbackToSavepoint(jsc, args[3], args[4]);
+          assert (args.length == 6);
+          returnCode = rollbackToSavepoint(jsc, args[3], args[4], Boolean.parseBoolean(args[5]));
           break;
         case IMPORT:
         case UPSERT:
@@ -192,7 +200,7 @@ public static void main(String[] args) throws Exception {
             configs.addAll(Arrays.asList(args).subList(9, args.length));
           }
           returnCode = cluster(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]), args[2],
-              Integer.parseInt(args[7]), HoodieClusteringJob.EXECUTE, propsFilePath, configs);
+              Integer.parseInt(args[7]), EXECUTE, propsFilePath, configs);
           break;
         case CLUSTERING_SCHEDULE_AND_EXECUTE:
           assert (args.length >= 8);
@@ -205,7 +213,7 @@ public static void main(String[] args) throws Exception {
             configs.addAll(Arrays.asList(args).subList(8, args.length));
           }
           returnCode = cluster(jsc, args[3], args[4], null, Integer.parseInt(args[5]), args[2],
-              Integer.parseInt(args[6]), HoodieClusteringJob.SCHEDULE_AND_EXECUTE, propsFilePath, configs);
+              Integer.parseInt(args[6]), SCHEDULE_AND_EXECUTE, propsFilePath, configs);
           break;
         case CLUSTERING_SCHEDULE:
           assert (args.length >= 7);
@@ -218,7 +226,7 @@ public static void main(String[] args) throws Exception {
             configs.addAll(Arrays.asList(args).subList(7, args.length));
           }
           returnCode = cluster(jsc, args[3], args[4], args[5], 1, args[2],
-              0, HoodieClusteringJob.SCHEDULE, propsFilePath, configs);
+              0, SCHEDULE, propsFilePath, configs);
           break;
         case CLEAN:
           assert (args.length >= 5);
@@ -285,7 +293,7 @@ protected static void clean(JavaSparkContext jsc, String basePath, String propsF
 
   protected static int deleteMarker(JavaSparkContext jsc, String instantTime, String basePath) {
     try {
-      SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
+      SparkRDDWriteClient client = createHoodieClient(jsc, basePath, false);
       HoodieWriteConfig config = client.getConfig();
       HoodieEngineContext context = client.getEngineContext();
       HoodieSparkTable table = HoodieSparkTable.create(config, context, true);
@@ -411,8 +419,8 @@ private static int doBootstrap(JavaSparkContext jsc, String tableName, String ta
       String bootstrapIndexClass, String selectorClass, String keyGenerator, String fullBootstrapInputProvider,
       String payloadClassName, String enableHiveSync, String propsFilePath, List<String> configs) throws IOException {
 
-    TypedProperties properties = propsFilePath == null ? UtilHelpers.buildProperties(configs)
-        : UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(propsFilePath), configs).getProps(true);
+    TypedProperties properties = propsFilePath == null ? buildProperties(configs)
+        : readConfig(jsc.hadoopConfiguration(), new Path(propsFilePath), configs).getProps(true);
 
     properties.setProperty(HoodieBootstrapConfig.BASE_PATH.key(), sourcePath);
 
@@ -455,7 +463,7 @@ private static int rollback(JavaSparkContext jsc, String instantTime, String bas
 
   private static int createSavepoint(JavaSparkContext jsc, String commitTime, String user,
       String comments, String basePath) throws Exception {
-    SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
+    SparkRDDWriteClient client = createHoodieClient(jsc, basePath, false);
     try {
       client.savepoint(commitTime, user, comments);
       LOG.info(String.format("The commit \"%s\" has been savepointed.", commitTime));
@@ -466,8 +474,8 @@ private static int createSavepoint(JavaSparkContext jsc, String commitTime, Stri
     }
   }
 
-  private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
-    SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
+  private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath, boolean lazyCleanPolicy) throws Exception {
+    SparkRDDWriteClient client = createHoodieClient(jsc, basePath, lazyCleanPolicy);
     try {
       client.restoreToSavepoint(savepointTime);
       LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
@@ -479,7 +487,7 @@ private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTim
   }
 
   private static int deleteSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
-    SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
+    SparkRDDWriteClient client = createHoodieClient(jsc, basePath, false);
     try {
       client.deleteSavepoint(savepointTime);
       LOG.info(String.format("Savepoint \"%s\" deleted.", savepointTime));
@@ -500,7 +508,8 @@ private static int deleteSavepoint(JavaSparkContext jsc, String savepointTime, S
    * @throws Exception
    */
   protected static int upgradeOrDowngradeTable(JavaSparkContext jsc, String basePath, String toVersion) {
-    HoodieWriteConfig config = getWriteConfig(basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()));
+    HoodieWriteConfig config = getWriteConfig(basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()),
+        false);
     HoodieTableMetaClient metaClient =
         HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(config.getBasePath())
             .setLoadActiveTimelineOnLoad(false).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
@@ -517,18 +526,20 @@ protected static int upgradeOrDowngradeTable(JavaSparkContext jsc, String basePa
     }
   }
 
-  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, Boolean rollbackUsingMarkers) throws Exception {
-    HoodieWriteConfig config = getWriteConfig(basePath, rollbackUsingMarkers);
+  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, Boolean rollbackUsingMarkers, boolean lazyCleanPolicy) throws Exception {
+    HoodieWriteConfig config = getWriteConfig(basePath, rollbackUsingMarkers, lazyCleanPolicy);
     return new SparkRDDWriteClient(new HoodieSparkEngineContext(jsc), config);
   }
 
-  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
-    return createHoodieClient(jsc, basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()));
+  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, boolean lazyCleanPolicy) throws Exception {
+    return createHoodieClient(jsc, basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()), lazyCleanPolicy);
   }
 
-  private static HoodieWriteConfig getWriteConfig(String basePath, Boolean rollbackUsingMarkers) {
+  private static HoodieWriteConfig getWriteConfig(String basePath, Boolean rollbackUsingMarkers, boolean lazyCleanPolicy) {
     return HoodieWriteConfig.newBuilder().withPath(basePath)
         .withRollbackUsingMarkers(rollbackUsingMarkers)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(lazyCleanPolicy ? HoodieFailedWritesCleaningPolicy.LAZY :
+            HoodieFailedWritesCleaningPolicy.EAGER).build())
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
   }
 }
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
index c475c633f58a6..cac4f1341b458 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -35,6 +36,7 @@
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.testutils.HoodieTestTable;
 import org.apache.hudi.common.util.Option;
 
 import org.apache.hadoop.conf.Configuration;
@@ -47,8 +49,10 @@
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.UUID;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -94,8 +98,11 @@ public void init() throws Exception {
       // Inflight Compaction
       HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath,
           new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, timestamp), conf);
+
+      Map<String, String> extraCommitMetadata =
+          Collections.singletonMap(HoodieCommitMetadata.SCHEMA_KEY, HoodieTestTable.PHONY_TABLE_SCHEMA);
       HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, conf, fileId1, fileId2,
-          Option.empty(), Option.empty());
+          Option.empty(), Option.empty(), extraCommitMetadata);
     }
 
     metaClient = HoodieTableMetaClient.reload(metaClient);
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
index ee7fbda11b783..621061ae71122 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
@@ -65,6 +65,7 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -203,6 +204,7 @@ public void testShowLogFileRecordsWithMerge() throws IOException, InterruptedExc
     // get expected result of 10 records.
     List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(partitionPath + "/*")))
         .map(status -> status.getPath().toString()).collect(Collectors.toList());
+    assertTrue(logFilePaths.size() > 0);
     HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
         .withBasePath(tablePath)
@@ -221,6 +223,7 @@ public void testShowLogFileRecordsWithMerge() throws IOException, InterruptedExc
         .withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue())
         .withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
         .withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
+        .withPartition(getRelativePartitionPath(new Path(tablePath), new Path(logFilePaths.get(0)).getParent()))
         .build();
 
     Iterator<HoodieRecord<? extends HoodieRecordPayload>> records = scanner.iterator();
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
index 27cc31ccea2cf..96e0873da5091 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
@@ -51,6 +51,7 @@
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
+import static org.apache.hudi.common.table.HoodieTableConfig.DROP_PARTITION_COLUMNS;
 import static org.apache.hudi.common.table.HoodieTableConfig.NAME;
 import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
 import static org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_LAYOUT_VERSION;
@@ -188,11 +189,12 @@ public void testOverwriteHoodieProperties() throws IOException {
     Map<String, String> expected = expectProps.entrySet().stream()
         .collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
     expected.putIfAbsent(TABLE_CHECKSUM.key(), String.valueOf(generateChecksum(tableConfig.getProps())));
+    expected.putIfAbsent(DROP_PARTITION_COLUMNS.key(), String.valueOf(DROP_PARTITION_COLUMNS.defaultValue()));
     assertEquals(expected, result);
 
     // check result
     List<String> allPropsStr = Arrays.asList(NAME.key(), TYPE.key(), VERSION.key(),
-        ARCHIVELOG_FOLDER.key(), TIMELINE_LAYOUT_VERSION.key(), TABLE_CHECKSUM.key());
+        ARCHIVELOG_FOLDER.key(), TIMELINE_LAYOUT_VERSION.key(), TABLE_CHECKSUM.key(), DROP_PARTITION_COLUMNS.key());
     String[][] rows = allPropsStr.stream().sorted().map(key -> new String[] {key,
             oldProps.getOrDefault(key, "null"), result.getOrDefault(key, "null")})
         .toArray(String[][]::new);
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
index 9a10893b35e89..cf4faf2e16488 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
@@ -26,17 +26,19 @@
 import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
 import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
-import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Tag;
@@ -80,7 +82,19 @@ public void init() throws Exception {
         put(DEFAULT_THIRD_PARTITION_PATH, "file-3");
       }
     };
-    HoodieTestTable.of(metaClient)
+
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(tablePath)
+        .withMetadataConfig(
+            // Column Stats Index is disabled, since these tests construct tables which are
+            // not valid (empty commit metadata, etc)
+            HoodieMetadataConfig.newBuilder()
+                .withMetadataIndexColumnStats(false)
+                .build()
+        )
+        .withRollbackUsingMarkers(false)
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
+    HoodieMetadataTestTable.of(metaClient, SparkHoodieBackedTableMetadataWriter.create(
+            metaClient.getHadoopConf(), config, context))
         .withPartitionMetaFiles(DEFAULT_PARTITION_PATHS)
         .addCommit("100")
         .withBaseFilesInPartitions(partitionAndFileId)
@@ -88,11 +102,8 @@ public void init() throws Exception {
         .withBaseFilesInPartitions(partitionAndFileId)
         .addInflightCommit("102")
         .withBaseFilesInPartitions(partitionAndFileId);
-    // generate two rollback
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(tablePath)
-        .withRollbackUsingMarkers(false)
-        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
 
+    // generate two rollback
     try (BaseHoodieWriteClient client = new SparkRDDWriteClient(context(), config)) {
       // Rollback inflight commit3 and commit2
       client.rollback("102");
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java
index 5f8021ab5d7d2..7de1c2d014260 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java
@@ -22,6 +22,8 @@
 import org.apache.hudi.cli.HoodieCLI;
 import org.apache.hudi.cli.commands.TableCommand;
 import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -29,6 +31,9 @@
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.springframework.shell.core.CommandResult;
@@ -118,6 +123,54 @@ public void testRollbackToSavepoint() throws IOException {
         new HoodieInstant(HoodieInstant.State.COMPLETED, "commit", "103")));
   }
 
+  /**
+   * Test case of command 'savepoint rollback' with metadata table bootstrap.
+   */
+  @Test
+  public void testRollbackToSavepointWithMetadataTableEnable() throws IOException {
+    // generate for savepoints
+    for (int i = 101; i < 105; i++) {
+      String instantTime = String.valueOf(i);
+      HoodieTestDataGenerator.createCommitFile(tablePath, instantTime, jsc.hadoopConfiguration());
+    }
+
+    // generate one savepoint at 102
+    String savepoint = "102";
+    HoodieTestDataGenerator.createSavepointFile(tablePath, savepoint, jsc.hadoopConfiguration());
+
+    // re-bootstrap metadata table
+    // delete first
+    String basePath = metaClient.getBasePath();
+    Path metadataTableBasePath = new Path(HoodieTableMetadata.getMetadataTableBasePath(basePath));
+    metaClient.getFs().delete(metadataTableBasePath, true);
+
+    // then bootstrap metadata table at instant 104
+    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(HoodieCLI.basePath)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build()).build();
+    SparkHoodieBackedTableMetadataWriter.create(HoodieCLI.conf, writeConfig, new HoodieSparkEngineContext(jsc));
+
+    assertTrue(HoodieCLI.fs.exists(metadataTableBasePath));
+
+    // roll back to savepoint
+    CommandResult cr = getShell().executeCommand(
+        String.format("savepoint rollback --savepoint %s --sparkMaster %s", savepoint, "local"));
+
+    assertAll("Command run failed",
+        () -> assertTrue(cr.isSuccess()),
+        () -> assertEquals(
+            String.format("Savepoint \"%s\" rolled back", savepoint), cr.getResult().toString()));
+
+    // there is 1 restore instant
+    HoodieActiveTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
+    assertEquals(1, timeline.getRestoreTimeline().countInstants());
+
+    // 103 and 104 instant had rollback
+    assertFalse(timeline.getCommitTimeline().containsInstant(
+        new HoodieInstant(HoodieInstant.State.COMPLETED, "commit", "103")));
+    assertFalse(timeline.getCommitTimeline().containsInstant(
+        new HoodieInstant(HoodieInstant.State.COMPLETED, "commit", "104")));
+  }
+
   /**
    * Test case of command 'savepoint delete'.
    */
diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml
index a55a136652728..a348a63921179 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-client-common</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-client-common</name>
   <packaging>jar</packaging>
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java
index 1c1cf2bb9f74b..7fece5c885f8a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java
@@ -21,6 +21,8 @@
 
 import org.apache.hudi.client.BaseClusterer;
 import org.apache.hudi.client.BaseHoodieWriteClient;
+import org.apache.hudi.common.engine.EngineProperty;
+import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieIOException;
@@ -42,18 +44,21 @@ public abstract class AsyncClusteringService extends HoodieAsyncTableService {
 
   private static final long serialVersionUID = 1L;
   private static final Logger LOG = LogManager.getLogger(AsyncClusteringService.class);
+  public static final String CLUSTERING_POOL_NAME = "hoodiecluster";
 
   private final int maxConcurrentClustering;
   private transient BaseClusterer clusteringClient;
+  protected transient HoodieEngineContext context;
 
-  public AsyncClusteringService(BaseHoodieWriteClient writeClient) {
-    this(writeClient, false);
+  public AsyncClusteringService(HoodieEngineContext context, BaseHoodieWriteClient writeClient) {
+    this(context, writeClient, false);
   }
 
-  public AsyncClusteringService(BaseHoodieWriteClient writeClient, boolean runInDaemonMode) {
+  public AsyncClusteringService(HoodieEngineContext context, BaseHoodieWriteClient writeClient, boolean runInDaemonMode) {
     super(writeClient.getConfig(), runInDaemonMode);
     this.clusteringClient = createClusteringClient(writeClient);
     this.maxConcurrentClustering = 1;
+    this.context = context;
   }
 
   protected abstract BaseClusterer createClusteringClient(BaseHoodieWriteClient client);
@@ -72,6 +77,9 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
 
     return Pair.of(CompletableFuture.allOf(IntStream.range(0, maxConcurrentClustering).mapToObj(i -> CompletableFuture.supplyAsync(() -> {
       try {
+        // Set Compactor Pool Name for allowing users to prioritize compaction
+        LOG.info("Setting pool name for clustering to " + CLUSTERING_POOL_NAME);
+        context.setProperty(EngineProperty.CLUSTERING_POOL_NAME, CLUSTERING_POOL_NAME);
         while (!isShutdownRequested()) {
           final HoodieInstant instant = fetchNextAsyncServiceInstant();
           if (null != instant) {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
index a6a7e18b1f6ab..32a8dee517389 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
@@ -20,10 +20,13 @@
 
 import org.apache.hudi.async.AsyncArchiveService;
 import org.apache.hudi.async.AsyncCleanerService;
+import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -46,6 +49,7 @@
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieInstant.State;
@@ -62,11 +66,23 @@
 import org.apache.hudi.exception.HoodieCommitException;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieIndexException;
 import org.apache.hudi.exception.HoodieRestoreException;
 import org.apache.hudi.exception.HoodieRollbackException;
 import org.apache.hudi.exception.HoodieSavepointException;
 import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.action.InternalSchemaChangeApplier;
+import org.apache.hudi.internal.schema.action.TableChange;
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
+import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
+import org.apache.hudi.internal.schema.utils.AvroSchemaEvolutionUtils;
+import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
+import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.metrics.HoodieMetrics;
 import org.apache.hudi.table.BulkInsertPartitioner;
 import org.apache.hudi.table.HoodieTable;
@@ -78,7 +94,9 @@
 import org.apache.hudi.table.upgrade.UpgradeDowngrade;
 
 import com.codahale.metrics.Timer;
+import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -91,9 +109,12 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.common.model.HoodieCommitMetadata.SCHEMA_KEY;
+
 /**
  * Abstract Write Client providing functionality for performing commit, index updates and rollback
  * Reused for regular write operations like upsert/insert/bulk-insert.. as well as bootstrap
@@ -124,6 +145,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
   protected transient AsyncArchiveService asyncArchiveService;
   protected final TransactionManager txnManager;
   protected Option<Pair<HoodieInstant, Map<String, String>>> lastCompletedTxnAndMetadata = Option.empty();
+  protected Set<String> pendingInflightAndRequestedInstants;
 
   /**
    * Create a write client, with new hudi index.
@@ -210,7 +232,8 @@ public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Opti
     try {
       preCommit(inflightInstant, metadata);
       commit(table, commitActionType, instantTime, metadata, stats);
-      postCommit(table, metadata, instantTime, extraMetadata);
+      // already within lock, and so no lock requried for archival
+      postCommit(table, metadata, instantTime, extraMetadata, false);
       LOG.info("Committed " + instantTime);
       releaseResources();
     } catch (IOException e) {
@@ -237,12 +260,42 @@ protected void commit(HoodieTable table, String commitActionType, String instant
     HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
     // Finalize write
     finalizeWrite(table, instantTime, stats);
+    // do save internal schema to support Implicitly add columns in write process
+    if (!metadata.getExtraMetadata().containsKey(SerDeHelper.LATEST_SCHEMA)
+        && metadata.getExtraMetadata().containsKey(SCHEMA_KEY) && table.getConfig().getSchemaEvolutionEnable()) {
+      saveInternalSchema(table, instantTime, metadata);
+    }
     // update Metadata table
     writeTableMetadata(table, instantTime, commitActionType, metadata);
     activeTimeline.saveAsComplete(new HoodieInstant(true, commitActionType, instantTime),
         Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
   }
 
+  // Save internal schema
+  private void saveInternalSchema(HoodieTable table, String instantTime, HoodieCommitMetadata metadata) {
+    TableSchemaResolver schemaUtil = new TableSchemaResolver(table.getMetaClient());
+    String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElse("");
+    FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(table.getMetaClient());
+    if (!historySchemaStr.isEmpty()) {
+      InternalSchema internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
+          SerDeHelper.parseSchemas(historySchemaStr));
+      Schema avroSchema = HoodieAvroUtils.createHoodieWriteSchema(new Schema.Parser().parse(config.getSchema()));
+      InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(avroSchema, internalSchema);
+      if (evolvedSchema.equals(internalSchema)) {
+        metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(evolvedSchema));
+        //TODO save history schema by metaTable
+        schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr);
+      } else {
+        evolvedSchema.setSchemaId(Long.parseLong(instantTime));
+        String newSchemaStr = SerDeHelper.toJson(evolvedSchema);
+        metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, newSchemaStr);
+        schemasManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(evolvedSchema, historySchemaStr));
+      }
+      // update SCHEMA_KEY
+      metadata.addMetadata(SCHEMA_KEY, AvroInternalSchemaConverter.convert(evolvedSchema, avroSchema.getName()).toString());
+    }
+  }
+
   protected HoodieTable createTable(HoodieWriteConfig config, Configuration hadoopConf) {
     return createTable(config, hadoopConf, false);
   }
@@ -400,7 +453,6 @@ protected void rollbackFailedBootstrap() {
   public abstract O bulkInsert(I records, final String instantTime,
                                Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner);
 
-
   /**
    * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie
    * table for the very first time (e.g: converting an existing table to Hoodie). The input records should contain no
@@ -440,6 +492,8 @@ protected void preWrite(String instantTime, WriteOperationType writeOperationTyp
       HoodieTableMetaClient metaClient) {
     setOperationType(writeOperationType);
     this.lastCompletedTxnAndMetadata = TransactionUtils.getLastCompletedTxnInstantAndMetadata(metaClient);
+    this.pendingInflightAndRequestedInstants = TransactionUtils.getInflightAndRequestedInstants(metaClient);
+    this.pendingInflightAndRequestedInstants.remove(instantTime);
     if (null == this.asyncCleanerService) {
       this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
     } else {
@@ -468,14 +522,16 @@ protected void preWrite(String instantTime, WriteOperationType writeOperationTyp
    * @param metadata      Commit Metadata corresponding to committed instant
    * @param instantTime   Instant Time
    * @param extraMetadata Additional Metadata passed by user
+   * @param acquireLockForArchival true if lock has to be acquired for archival. false otherwise.
    */
-  protected void postCommit(HoodieTable table, HoodieCommitMetadata metadata, String instantTime, Option<Map<String, String>> extraMetadata) {
+  protected void postCommit(HoodieTable table, HoodieCommitMetadata metadata, String instantTime, Option<Map<String, String>> extraMetadata,
+                            boolean acquireLockForArchival) {
     try {
       // Delete the marker directory for the instant.
       WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
           .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
       autoCleanOnCommit();
-      autoArchiveOnCommit(table);
+      autoArchiveOnCommit(table, acquireLockForArchival);
     } finally {
       this.heartbeatClient.stop(instantTime);
     }
@@ -559,7 +615,7 @@ protected void autoCleanOnCommit() {
     }
   }
 
-  protected void autoArchiveOnCommit(HoodieTable table) {
+  protected void autoArchiveOnCommit(HoodieTable table, boolean acquireLockForArchival) {
     if (!config.isAutoArchive()) {
       return;
     }
@@ -570,7 +626,7 @@ protected void autoArchiveOnCommit(HoodieTable table) {
       LOG.info("Async archiver has finished");
     } else {
       LOG.info("Start to archive synchronously.");
-      archive(table);
+      archive(table, acquireLockForArchival);
     }
   }
 
@@ -639,9 +695,30 @@ public void deleteSavepoint(String savepointTime) {
    * @return true if the savepoint was restored to successfully
    */
   public void restoreToSavepoint(String savepointTime) {
-    HoodieTable<T, I, K, O> table = initTable(WriteOperationType.UNKNOWN, Option.empty());
+    boolean initialMetadataTableIfNecessary = config.isMetadataTableEnabled();
+    if (initialMetadataTableIfNecessary) {
+      try {
+        // Delete metadata table directly when users trigger savepoint rollback if mdt existed and beforeTimelineStarts
+        String metadataTableBasePathStr = HoodieTableMetadata.getMetadataTableBasePath(config.getBasePath());
+        HoodieTableMetaClient mdtClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePathStr).build();
+        // Same as HoodieTableMetadataUtil#processRollbackMetadata
+        HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, savepointTime);
+        // The instant required to sync rollback to MDT has been archived and the mdt syncing will be failed
+        // So that we need to delete the whole MDT here.
+        if (mdtClient.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
+          mdtClient.getFs().delete(new Path(metadataTableBasePathStr), true);
+          // rollbackToSavepoint action will try to bootstrap MDT at first but sync to MDT will fail at the current scenario.
+          // so that we need to disable metadata initialized here.
+          initialMetadataTableIfNecessary = false;
+        }
+      } catch (Exception e) {
+        // Metadata directory does not exist
+      }
+    }
+
+    HoodieTable<T, I, K, O> table = initTable(WriteOperationType.UNKNOWN, Option.empty(), initialMetadataTableIfNecessary);
     SavepointHelpers.validateSavepointPresence(table, savepointTime);
-    restoreToInstant(savepointTime);
+    restoreToInstant(savepointTime, initialMetadataTableIfNecessary);
     SavepointHelpers.validateSavepointRestore(table, savepointTime);
   }
 
@@ -655,7 +732,7 @@ public boolean rollback(final String commitInstantTime) throws HoodieRollbackExc
   /**
    * @Deprecated
    * Rollback the inflight record changes with the given commit time. This
-   * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String)}
+   * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String, boolean)
    *
    * @param commitInstantTime Instant time of the commit
    * @param pendingRollbackInfo pending rollback instant and plan if rollback failed from previous attempt.
@@ -672,14 +749,24 @@ public boolean rollback(final String commitInstantTime, Option<HoodiePendingRoll
       Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
           .filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
           .findFirst());
-      if (commitInstantOpt.isPresent()) {
-        LOG.info("Scheduling Rollback at instant time :" + rollbackInstantTime);
+      if (commitInstantOpt.isPresent() || pendingRollbackInfo.isPresent()) {
+        LOG.info(String.format("Scheduling Rollback at instant time : %s "
+                + "(exists in active timeline: %s), with rollback plan: %s",
+            rollbackInstantTime, commitInstantOpt.isPresent(), pendingRollbackInfo.isPresent()));
         Option<HoodieRollbackPlan> rollbackPlanOption = pendingRollbackInfo.map(entry -> Option.of(entry.getRollbackPlan()))
             .orElseGet(() -> table.scheduleRollback(context, rollbackInstantTime, commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers()));
         if (rollbackPlanOption.isPresent()) {
-          // execute rollback
-          HoodieRollbackMetadata rollbackMetadata = table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true,
-              skipLocking);
+          // There can be a case where the inflight rollback failed after the instant files
+          // are deleted for commitInstantTime, so that commitInstantOpt is empty as it is
+          // not present in the timeline.  In such a case, the hoodie instant instance
+          // is reconstructed to allow the rollback to be reattempted, and the deleteInstants
+          // is set to false since they are already deleted.
+          // Execute rollback
+          HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent()
+              ? table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true, skipLocking)
+              : table.rollback(context, rollbackInstantTime, new HoodieInstant(
+                  true, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime),
+              false, skipLocking);
           if (timerContext != null) {
             long durationInMs = metrics.getDurationInMs(timerContext.stop());
             metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted());
@@ -703,12 +790,12 @@ public boolean rollback(final String commitInstantTime, Option<HoodiePendingRoll
    *
    * @param instantTime Instant time to which restoration is requested
    */
-  public HoodieRestoreMetadata restoreToInstant(final String instantTime) throws HoodieRestoreException {
+  public HoodieRestoreMetadata restoreToInstant(final String instantTime, boolean initialMetadataTableIfNecessary) throws HoodieRestoreException {
     LOG.info("Begin restore to instant " + instantTime);
     final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime();
     Timer.Context timerContext = metrics.getRollbackCtx();
     try {
-      HoodieTable<T, I, K, O> table = initTable(WriteOperationType.UNKNOWN, Option.empty());
+      HoodieTable<T, I, K, O> table = initTable(WriteOperationType.UNKNOWN, Option.empty(), initialMetadataTableIfNecessary);
       Option<HoodieRestorePlan> restorePlanOption = table.scheduleRestore(context, restoreInstantTime, instantTime);
       if (restorePlanOption.isPresent()) {
         HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
@@ -808,15 +895,16 @@ public HoodieCleanMetadata clean(boolean skipLocking) {
    * Trigger archival for the table. This ensures that the number of commits do not explode
    * and keep increasing unbounded over time.
    * @param table table to commit on.
+   * @param acquireLockForArchival true if lock has to be acquired for archival. false otherwise.
    */
-  protected void archive(HoodieTable table) {
+  protected void archive(HoodieTable table, boolean acquireLockForArchival) {
     if (!tableServicesEnabled(config)) {
       return;
     }
     try {
       // We cannot have unbounded commit files. Archive commits if we have to archive
       HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(config, table);
-      archiver.archiveIfRequired(context);
+      archiver.archiveIfRequired(context, acquireLockForArchival);
     } catch (IOException ioe) {
       throw new HoodieIOException("Failed to archive", ioe);
     }
@@ -829,7 +917,7 @@ protected void archive(HoodieTable table) {
   public void archive() {
     // Create a Hoodie table which encapsulated the commits and files visible
     HoodieTable table = createTable(config, hadoopConf);
-    archive(table);
+    archive(table, true);
   }
 
   /**
@@ -915,6 +1003,53 @@ public boolean scheduleCompactionAtInstant(String instantTime, Option<Map<String
     return scheduleTableService(instantTime, extraMetadata, TableServiceType.COMPACT).isPresent();
   }
 
+
+  /**
+   * Schedules INDEX action.
+   *
+   * @param partitionTypes - list of {@link MetadataPartitionType} which needs to be indexed
+   * @return instant time for the requested INDEX action
+   */
+  public Option<String> scheduleIndexing(List<MetadataPartitionType> partitionTypes) {
+    String instantTime = HoodieActiveTimeline.createNewInstantTime();
+    Option<HoodieIndexPlan> indexPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
+        .scheduleIndexing(context, instantTime, partitionTypes);
+    return indexPlan.isPresent() ? Option.of(instantTime) : Option.empty();
+  }
+
+  /**
+   * Runs INDEX action to build out the metadata partitions as planned for the given instant time.
+   *
+   * @param indexInstantTime - instant time for the requested INDEX action
+   * @return {@link Option<HoodieIndexCommitMetadata>} after successful indexing.
+   */
+  public Option<HoodieIndexCommitMetadata> index(String indexInstantTime) {
+    return createTable(config, hadoopConf, config.isMetadataTableEnabled()).index(context, indexInstantTime);
+  }
+
+  /**
+   * Drops the index and removes the metadata partitions.
+   *
+   * @param partitionTypes - list of {@link MetadataPartitionType} which needs to be indexed
+   */
+  public void dropIndex(List<MetadataPartitionType> partitionTypes) {
+    HoodieTable table = createTable(config, hadoopConf);
+    String dropInstant = HoodieActiveTimeline.createNewInstantTime();
+    this.txnManager.beginTransaction();
+    try {
+      context.setJobStatus(this.getClass().getSimpleName(), "Dropping partitions from metadata table");
+      table.getMetadataWriter(dropInstant).ifPresent(w -> {
+        try {
+          ((HoodieTableMetadataWriter) w).dropMetadataPartitions(partitionTypes);
+        } catch (IOException e) {
+          throw new HoodieIndexException("Failed to drop metadata index. ", e);
+        }
+      });
+    } finally {
+      this.txnManager.endTransaction();
+    }
+  }
+
   /**
    * Performs Compaction for the workload stored in instant-time.
    *
@@ -978,9 +1113,28 @@ protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos
   protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos(HoodieTableMetaClient metaClient, boolean ignoreCompactionAndClusteringInstants) {
     List<HoodieInstant> instants = metaClient.getActiveTimeline().filterPendingRollbackTimeline().getInstants().collect(Collectors.toList());
     Map<String, Option<HoodiePendingRollbackInfo>> infoMap = new HashMap<>();
-    for (HoodieInstant instant : instants) {
+    for (HoodieInstant rollbackInstant : instants) {
+      HoodieRollbackPlan rollbackPlan;
+      try {
+        rollbackPlan = RollbackUtils.getRollbackPlan(metaClient, rollbackInstant);
+      } catch (IOException e) {
+        if (rollbackInstant.isRequested()) {
+          LOG.warn("Fetching rollback plan failed for " + rollbackInstant + ", deleting the plan since it's in REQUESTED state", e);
+          try {
+            metaClient.getActiveTimeline().deletePending(rollbackInstant);
+          } catch (HoodieIOException he) {
+            LOG.warn("Cannot delete " + rollbackInstant, he);
+            continue;
+          }
+        } else {
+          // Here we assume that if the rollback is inflight, the rollback plan is intact
+          // in instant.rollback.requested.  The exception here can be due to other reasons.
+          LOG.warn("Fetching rollback plan failed for " + rollbackInstant + ", skip the plan", e);
+        }
+        continue;
+      }
+
       try {
-        HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(metaClient, instant);
         String action = rollbackPlan.getInstantToRollback().getAction();
         if (ignoreCompactionAndClusteringInstants) {
           if (!HoodieTimeline.COMPACTION_ACTION.equals(action)) {
@@ -989,14 +1143,14 @@ protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos
                 rollbackPlan.getInstantToRollback().getCommitTime())).isPresent();
             if (!isClustering) {
               String instantToRollback = rollbackPlan.getInstantToRollback().getCommitTime();
-              infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(instant, rollbackPlan)));
+              infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(rollbackInstant, rollbackPlan)));
             }
           }
         } else {
-          infoMap.putIfAbsent(rollbackPlan.getInstantToRollback().getCommitTime(), Option.of(new HoodiePendingRollbackInfo(instant, rollbackPlan)));
+          infoMap.putIfAbsent(rollbackPlan.getInstantToRollback().getCommitTime(), Option.of(new HoodiePendingRollbackInfo(rollbackInstant, rollbackPlan)));
         }
-      } catch (IOException e) {
-        LOG.warn("Fetching rollback plan failed for " + infoMap + ", skip the plan", e);
+      } catch (Exception e) {
+        LOG.warn("Processing rollback plan failed for " + rollbackInstant + ", skip the plan", e);
       }
     }
     return infoMap;
@@ -1274,14 +1428,14 @@ public HoodieMetrics getMetrics() {
    * @param instantTime current inflight instant time
    * @return instantiated {@link HoodieTable}
    */
-  protected abstract HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime);
+  protected abstract HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime, boolean initialMetadataTableIfNecessary);
 
   /**
    * Instantiates and initializes instance of {@link HoodieTable}, performing crucial bootstrapping
    * operations such as:
    *
    * NOTE: This method is engine-agnostic and SHOULD NOT be overloaded, please check on
-   * {@link #doInitTable(HoodieTableMetaClient, Option<String>)} instead
+   * {@link #doInitTable(HoodieTableMetaClient, Option, boolean)} instead
    *
    * <ul>
    *   <li>Checking whether upgrade/downgrade is required</li>
@@ -1289,7 +1443,7 @@ public HoodieMetrics getMetrics() {
    *   <li>Initializing metrics contexts</li>
    * </ul>
    */
-  protected final HoodieTable initTable(WriteOperationType operationType, Option<String> instantTime) {
+  protected final HoodieTable initTable(WriteOperationType operationType, Option<String> instantTime, boolean initialMetadataTableIfNecessary) {
     HoodieTableMetaClient metaClient = createMetaClient(true);
     // Setup write schemas for deletes
     if (operationType == WriteOperationType.DELETE) {
@@ -1301,7 +1455,7 @@ protected final HoodieTable initTable(WriteOperationType operationType, Option<S
     this.txnManager.beginTransaction();
     try {
       tryUpgrade(metaClient, instantTime);
-      table = doInitTable(metaClient, instantTime);
+      table = doInitTable(metaClient, instantTime, initialMetadataTableIfNecessary);
     } finally {
       this.txnManager.endTransaction();
     }
@@ -1334,6 +1488,10 @@ protected final HoodieTable initTable(WriteOperationType operationType, Option<S
     return table;
   }
 
+  protected final HoodieTable initTable(WriteOperationType operationType, Option<String> instantTime) {
+    return initTable(operationType, instantTime, config.isMetadataTableEnabled());
+  }
+
   /**
      * Sets write schema from last instant since deletes may not have schema set in the config.
      */
@@ -1347,8 +1505,8 @@ protected void setWriteSchemaForDeletes(HoodieTableMetaClient metaClient) {
       if (lastInstant.isPresent()) {
         HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
             activeTimeline.getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
-        if (commitMetadata.getExtraMetadata().containsKey(HoodieCommitMetadata.SCHEMA_KEY)) {
-          config.setSchema(commitMetadata.getExtraMetadata().get(HoodieCommitMetadata.SCHEMA_KEY));
+        if (commitMetadata.getExtraMetadata().containsKey(SCHEMA_KEY)) {
+          config.setSchema(commitMetadata.getExtraMetadata().get(SCHEMA_KEY));
         } else {
           throw new HoodieIOException("Latest commit does not have any schema in commit metadata");
         }
@@ -1410,4 +1568,138 @@ private void tryUpgrade(HoodieTableMetaClient metaClient, Option<String> instant
       metaClient.reloadActiveTimeline();
     }
   }
+
+  /**
+   * add columns to table.
+   *
+   * @param colName col name to be added. if we want to add col to a nested filed, the fullName should be specify
+   * @param schema col type to be added.
+   * @param doc col doc to be added.
+   * @param position col position to be added
+   * @param positionType col position change type. now support three change types: first/after/before
+   */
+  public void addColumn(String colName, Schema schema, String doc, String position, TableChange.ColumnPositionChange.ColumnPositionType positionType) {
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft())
+        .applyAddChange(colName, AvroInternalSchemaConverter.convertToField(schema), doc, position, positionType);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  public void addColumn(String colName, Schema schema) {
+    addColumn(colName, schema, null, "", TableChange.ColumnPositionChange.ColumnPositionType.NO_OPERATION);
+  }
+
+  /**
+   * delete columns to table.
+   *
+   * @param colNames col name to be deleted. if we want to delete col from a nested filed, the fullName should be specify
+   */
+  public void deleteColumns(String... colNames) {
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyDeleteChange(colNames);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  /**
+   * rename col name for hudi table.
+   *
+   * @param colName col name to be renamed. if we want to rename col from a nested filed, the fullName should be specify
+   * @param newName new name for current col. no need to specify fullName.
+   */
+  public void renameColumn(String colName, String newName) {
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyRenameChange(colName, newName);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  /**
+   * update col nullable attribute for hudi table.
+   *
+   * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specify
+   * @param nullable .
+   */
+  public void updateColumnNullability(String colName, boolean nullable) {
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyColumnNullabilityChange(colName, nullable);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  /**
+   * update col Type for hudi table.
+   * only support update primitive type to primitive type.
+   * cannot update nest type to nest type or primitive type eg: RecordType -> MapType, MapType -> LongType.
+   *
+   * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specify
+   * @param newType .
+   */
+  public void updateColumnType(String colName, Type newType) {
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyColumnTypeChange(colName, newType);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  /**
+   * update col comment for hudi table.
+   *
+   * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specify
+   * @param doc .
+   */
+  public void updateColumnComment(String colName, String doc) {
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyColumnCommentChange(colName, doc);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  /**
+   * reorder the position of col.
+   *
+   * @param colName column which need to be reordered. if we want to change col from a nested filed, the fullName should be specify.
+   * @param referColName reference position.
+   * @param orderType col position change type. now support three change types: first/after/before
+   */
+  public void reOrderColPosition(String colName, String referColName, TableChange.ColumnPositionChange.ColumnPositionType orderType) {
+    if (colName == null || orderType == null || referColName == null) {
+      return;
+    }
+    //get internalSchema
+    Pair<InternalSchema, HoodieTableMetaClient> pair = getInternalSchemaAndMetaClient();
+    InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft())
+        .applyReOrderColPositionChange(colName, referColName, orderType);
+    commitTableChange(newSchema, pair.getRight());
+  }
+
+  private Pair<InternalSchema, HoodieTableMetaClient> getInternalSchemaAndMetaClient() {
+    HoodieTableMetaClient metaClient = createMetaClient(true);
+    TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
+    Option<InternalSchema> internalSchemaOption = schemaUtil.getTableInternalSchemaFromCommitMetadata();
+    if (!internalSchemaOption.isPresent()) {
+      throw new HoodieException(String.format("cannot find schema for current table: %s", config.getBasePath()));
+    }
+    return Pair.of(internalSchemaOption.get(), metaClient);
+  }
+
+  private void commitTableChange(InternalSchema newSchema, HoodieTableMetaClient metaClient) {
+    TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
+    String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElse("");
+    Schema schema = AvroInternalSchemaConverter.convert(newSchema, config.getTableName());
+    String commitActionType = CommitUtils.getCommitActionType(WriteOperationType.ALTER_SCHEMA, metaClient.getTableType());
+    String instantTime = HoodieActiveTimeline.createNewInstantTime();
+    startCommitWithTime(instantTime, commitActionType, metaClient);
+    config.setSchema(schema.toString());
+    HoodieActiveTimeline timeLine = metaClient.getActiveTimeline();
+    HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime);
+    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
+    metadata.setOperationType(WriteOperationType.ALTER_SCHEMA);
+    try {
+      timeLine.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    } catch (IOException io) {
+      throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io);
+    }
+    Map<String, String> extraMeta = new HashMap<>();
+    extraMeta.put(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(newSchema.setSchemaId(Long.getLong(instantTime))));
+    // try to save history schemas
+    FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(metaClient);
+    schemasManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(newSchema, historySchemaStr));
+    commitStats(instantTime, Collections.EMPTY_LIST, Option.of(extraMeta), commitActionType);
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
index 66c89cfdc014a..ca76e4e3bf3ba 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
@@ -21,6 +21,7 @@
 
 import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
 import org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan;
+import org.apache.hudi.client.transaction.TransactionManager;
 import org.apache.hudi.client.utils.MetadataConversionUtils;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
@@ -71,6 +72,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
@@ -96,6 +98,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
   private final int minInstantsToKeep;
   private final HoodieTable<T, I, K, O> table;
   private final HoodieTableMetaClient metaClient;
+  private final TransactionManager txnManager;
 
   public HoodieTimelineArchiver(HoodieWriteConfig config, HoodieTable<T, I, K, O> table) {
     this.config = config;
@@ -104,6 +107,7 @@ public HoodieTimelineArchiver(HoodieWriteConfig config, HoodieTable<T, I, K, O>
     this.archiveFilePath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getArchivePath());
     this.maxInstantsToKeep = config.getMaxCommitsToKeep();
     this.minInstantsToKeep = config.getMinCommitsToKeep();
+    this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
   }
 
   private Writer openWriter() {
@@ -143,11 +147,18 @@ private void close() {
     }
   }
 
+  public boolean archiveIfRequired(HoodieEngineContext context) throws IOException {
+    return archiveIfRequired(context, false);
+  }
+
   /**
    * Check if commits need to be archived. If yes, archive commits.
    */
-  public boolean archiveIfRequired(HoodieEngineContext context) throws IOException {
+  public boolean archiveIfRequired(HoodieEngineContext context, boolean acquireLock) throws IOException {
     try {
+      if (acquireLock) {
+        txnManager.beginTransaction();
+      }
       List<HoodieInstant> instantsToArchive = getInstantsToArchive().collect(Collectors.toList());
       verifyLastMergeArchiveFilesIfNecessary(context);
       boolean success = true;
@@ -167,6 +178,9 @@ public boolean archiveIfRequired(HoodieEngineContext context) throws IOException
       return success;
     } finally {
       close();
+      if (acquireLock) {
+        txnManager.endTransaction();
+      }
     }
   }
 
@@ -325,7 +339,7 @@ public void mergeArchiveFiles(List<FileStatus> compactCandidate) throws IOExcept
           // Read the avro blocks
           while (reader.hasNext()) {
             HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-            blk.getRecordItr().forEachRemaining(records::add);
+            blk.getRecordIterator().forEachRemaining(records::add);
             if (records.size() >= this.config.getCommitArchivalBatchSize()) {
               writeToFile(wrapperSchema, records);
             }
@@ -469,10 +483,32 @@ private Stream<HoodieInstant> getInstantsToArchive() {
         throw new HoodieException("Error limiting instant archival based on metadata table", e);
       }
     }
-    
-    return instants.flatMap(hoodieInstant ->
-        groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
-            HoodieInstant.getComparableAction(hoodieInstant.getAction()))).stream());
+
+    // If this is a metadata table, do not archive the commits that live in data set
+    // active timeline. This is required by metadata table,
+    // see HoodieTableMetadataUtil#processRollbackMetadata for details.
+    if (HoodieTableMetadata.isMetadataTable(config.getBasePath())) {
+      HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder()
+          .setBasePath(HoodieTableMetadata.getDatasetBasePath(config.getBasePath()))
+          .setConf(metaClient.getHadoopConf())
+          .build();
+      Option<String> earliestActiveDatasetCommit = dataMetaClient.getActiveTimeline().firstInstant().map(HoodieInstant::getTimestamp);
+      if (earliestActiveDatasetCommit.isPresent()) {
+        instants = instants.filter(instant ->
+            HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, earliestActiveDatasetCommit.get()));
+      }
+    }
+
+    return instants.flatMap(hoodieInstant -> {
+      List<HoodieInstant> instantsToStream = groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
+                HoodieInstant.getComparableAction(hoodieInstant.getAction())));
+      if (instantsToStream != null) {
+        return instantsToStream.stream();
+      } else {
+        // if a concurrent writer archived the instant
+        return Collections.EMPTY_LIST.stream();
+      }
+    });
   }
 
   private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants, HoodieEngineContext context) throws IOException {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/ConcurrentOperation.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/ConcurrentOperation.java
index e78a157a5fd16..40da7dca7fcbb 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/ConcurrentOperation.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/ConcurrentOperation.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.client.transaction;
 
-import java.io.IOException;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
 import org.apache.hudi.client.utils.MetadataConversionUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -27,15 +26,18 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.CommitUtils;
+import org.apache.hudi.common.util.Option;
+
+import java.io.IOException;
 import java.util.Collections;
 import java.util.Set;
 import java.util.stream.Collectors;
-import org.apache.hudi.common.util.Option;
 
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.DELTA_COMMIT_ACTION;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
+import static org.apache.hudi.common.util.CommitUtils.getFileIdWithoutSuffixAndRelativePathsFromSpecificRecord;
 
 /**
  * This class is used to hold all information used to identify how to resolve conflicts between instants.
@@ -52,7 +54,7 @@ public class ConcurrentOperation {
   private final String instantTime;
   private Set<String> mutatedFileIds = Collections.EMPTY_SET;
 
-  public ConcurrentOperation(HoodieInstant instant, HoodieTableMetaClient metaClient) throws IOException  {
+  public ConcurrentOperation(HoodieInstant instant, HoodieTableMetaClient metaClient) throws IOException {
     this.metadataWrapper = new HoodieMetadataWrapper(MetadataConversionUtils.createMetaWrapper(instant, metaClient));
     this.commitMetadataOption = Option.empty();
     this.actionState = instant.getState().name();
@@ -106,24 +108,37 @@ private void init(HoodieInstant instant) {
           break;
         case COMMIT_ACTION:
         case DELTA_COMMIT_ACTION:
-          this.mutatedFileIds = CommitUtils.getFileIdWithoutSuffixAndRelativePathsFromSpecificRecord(this.metadataWrapper.getMetadataFromTimeline().getHoodieCommitMetadata()
+          this.mutatedFileIds = getFileIdWithoutSuffixAndRelativePathsFromSpecificRecord(this.metadataWrapper.getMetadataFromTimeline().getHoodieCommitMetadata()
               .getPartitionToWriteStats()).keySet();
           this.operationType = WriteOperationType.fromValue(this.metadataWrapper.getMetadataFromTimeline().getHoodieCommitMetadata().getOperationType());
           break;
         case REPLACE_COMMIT_ACTION:
           if (instant.isCompleted()) {
-            this.mutatedFileIds = CommitUtils.getFileIdWithoutSuffixAndRelativePathsFromSpecificRecord(
+            this.mutatedFileIds = getFileIdWithoutSuffixAndRelativePathsFromSpecificRecord(
                 this.metadataWrapper.getMetadataFromTimeline().getHoodieReplaceCommitMetadata().getPartitionToWriteStats()).keySet();
             this.operationType = WriteOperationType.fromValue(this.metadataWrapper.getMetadataFromTimeline().getHoodieReplaceCommitMetadata().getOperationType());
           } else {
+            // we need to have different handling for requested and inflight replacecommit because
+            // for requested replacecommit, clustering will generate a plan and HoodieRequestedReplaceMetadata will not be empty, but insert_overwrite/insert_overwrite_table could have empty content
+            // for inflight replacecommit, clustering will have no content in metadata, but insert_overwrite/insert_overwrite_table will have some commit metadata
             HoodieRequestedReplaceMetadata requestedReplaceMetadata = this.metadataWrapper.getMetadataFromTimeline().getHoodieRequestedReplaceMetadata();
-            this.mutatedFileIds = requestedReplaceMetadata
-                .getClusteringPlan().getInputGroups()
-                .stream()
-                .flatMap(ig -> ig.getSlices().stream())
-                .map(file -> file.getFileId())
-                .collect(Collectors.toSet());
-            this.operationType = WriteOperationType.CLUSTER;
+            org.apache.hudi.avro.model.HoodieCommitMetadata inflightCommitMetadata = this.metadataWrapper.getMetadataFromTimeline().getHoodieInflightReplaceMetadata();
+            if (instant.isRequested()) {
+              if (requestedReplaceMetadata != null) {
+                this.mutatedFileIds = getFileIdsFromRequestedReplaceMetadata(requestedReplaceMetadata);
+                this.operationType = WriteOperationType.CLUSTER;
+              }
+            } else {
+              if (inflightCommitMetadata != null) {
+                this.mutatedFileIds = getFileIdWithoutSuffixAndRelativePathsFromSpecificRecord(inflightCommitMetadata.getPartitionToWriteStats()).keySet();
+                this.operationType = WriteOperationType.fromValue(this.metadataWrapper.getMetadataFromTimeline().getHoodieCommitMetadata().getOperationType());
+              } else if (requestedReplaceMetadata != null) {
+                // inflight replacecommit metadata is empty due to clustering, read fileIds from requested replacecommit
+                this.mutatedFileIds = getFileIdsFromRequestedReplaceMetadata(requestedReplaceMetadata);
+                this.operationType = WriteOperationType.CLUSTER;
+              }
+              // NOTE: it cannot be the case that instant is inflight, and both the requested and inflight replacecommit metadata are empty
+            }
           }
           break;
         default:
@@ -142,6 +157,15 @@ private void init(HoodieInstant instant) {
     }
   }
 
+  private static Set<String> getFileIdsFromRequestedReplaceMetadata(HoodieRequestedReplaceMetadata requestedReplaceMetadata) {
+    return requestedReplaceMetadata
+        .getClusteringPlan().getInputGroups()
+        .stream()
+        .flatMap(ig -> ig.getSlices().stream())
+        .map(file -> file.getFileId())
+        .collect(Collectors.toSet());
+  }
+
   @Override
   public String toString() {
     return "{"
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java
index 8a1c7c0e96d49..fc5b7a75f7f60 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java
@@ -18,22 +18,25 @@
 
 package org.apache.hudi.client.transaction.lock;
 
-import org.apache.curator.framework.CuratorFramework;
-import org.apache.curator.framework.CuratorFrameworkFactory;
-import org.apache.curator.framework.imps.CuratorFrameworkState;
-import org.apache.curator.framework.recipes.locks.InterProcessMutex;
-import org.apache.curator.retry.BoundedExponentialBackoffRetry;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hudi.common.config.LockConfiguration;
 import org.apache.hudi.common.lock.LockProvider;
 import org.apache.hudi.common.lock.LockState;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.HoodieLockException;
+
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.CuratorFrameworkFactory;
+import org.apache.curator.framework.imps.CuratorFrameworkState;
+import org.apache.curator.framework.recipes.locks.InterProcessMutex;
+import org.apache.curator.retry.BoundedExponentialBackoffRetry;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import javax.annotation.concurrent.NotThreadSafe;
+
+import java.io.Serializable;
 import java.util.concurrent.TimeUnit;
 
 import static org.apache.hudi.common.config.LockConfiguration.DEFAULT_ZK_CONNECTION_TIMEOUT_MS;
@@ -52,11 +55,11 @@
  * using zookeeper. Users need to have a Zookeeper cluster deployed to be able to use this lock.
  */
 @NotThreadSafe
-public class ZookeeperBasedLockProvider implements LockProvider<InterProcessMutex> {
+public class ZookeeperBasedLockProvider implements LockProvider<InterProcessMutex>, Serializable {
 
   private static final Logger LOG = LogManager.getLogger(ZookeeperBasedLockProvider.class);
 
-  private final CuratorFramework curatorFrameworkClient;
+  private final transient CuratorFramework curatorFrameworkClient;
   private volatile InterProcessMutex lock = null;
   protected LockConfiguration lockConfiguration;
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MetadataConversionUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MetadataConversionUtils.java
index 8a9d0b3204e74..d588a9c5dd0c9 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MetadataConversionUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MetadataConversionUtils.java
@@ -21,7 +21,6 @@
 import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
-
 import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
@@ -34,6 +33,7 @@
 import org.apache.hudi.common.model.HoodieRollingStatMetadata;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -146,6 +146,19 @@ private static Option<HoodieRequestedReplaceMetadata> getRequestedReplaceMetadat
     return Option.of(TimelineMetadataUtils.deserializeRequestedReplaceMetadata(requestedContent.get()));
   }
 
+  public static Option<HoodieCommitMetadata> getHoodieCommitMetadata(HoodieTableMetaClient metaClient, HoodieInstant hoodieInstant) throws IOException {
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
+
+    if (hoodieInstant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
+      return Option.of(HoodieReplaceCommitMetadata.fromBytes(timeline.getInstantDetails(hoodieInstant).get(),
+          HoodieReplaceCommitMetadata.class));
+    }
+    return Option.of(HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(hoodieInstant).get(),
+        HoodieCommitMetadata.class));
+
+  }
+
   public static org.apache.hudi.avro.model.HoodieCommitMetadata convertCommitMetadata(
           HoodieCommitMetadata hoodieCommitMetadata) {
     ObjectMapper mapper = new ObjectMapper();
@@ -160,4 +173,4 @@ public static org.apache.hudi.avro.model.HoodieCommitMetadata convertCommitMetad
     avroMetaData.getExtraMetadata().put(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY, "");
     return avroMetaData;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java
index 9d7683128fc8c..ec15effdc4663 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java
@@ -26,6 +26,7 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
@@ -36,7 +37,9 @@
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
+import java.util.Set;
 import java.util.Map;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 public class TransactionUtils {
@@ -51,26 +54,8 @@ public class TransactionUtils {
    * @param thisCommitMetadata
    * @param config
    * @param lastCompletedTxnOwnerInstant
-   * @return
-   * @throws HoodieWriteConflictException
-   */
-  public static Option<HoodieCommitMetadata> resolveWriteConflictIfAny(
-      final HoodieTable table,
-      final Option<HoodieInstant> currentTxnOwnerInstant,
-      final Option<HoodieCommitMetadata> thisCommitMetadata,
-      final HoodieWriteConfig config,
-      Option<HoodieInstant> lastCompletedTxnOwnerInstant) throws HoodieWriteConflictException {
-    return resolveWriteConflictIfAny(table, currentTxnOwnerInstant, thisCommitMetadata, config, lastCompletedTxnOwnerInstant, false);
-  }
-
-  /**
-   * Resolve any write conflicts when committing data.
+   * @param pendingInstants
    *
-   * @param table
-   * @param currentTxnOwnerInstant
-   * @param thisCommitMetadata
-   * @param config
-   * @param lastCompletedTxnOwnerInstant
    * @return
    * @throws HoodieWriteConflictException
    */
@@ -80,11 +65,16 @@ public static Option<HoodieCommitMetadata> resolveWriteConflictIfAny(
       final Option<HoodieCommitMetadata> thisCommitMetadata,
       final HoodieWriteConfig config,
       Option<HoodieInstant> lastCompletedTxnOwnerInstant,
-      boolean reloadActiveTimeline) throws HoodieWriteConflictException {
+      boolean reloadActiveTimeline,
+      Set<String> pendingInstants) throws HoodieWriteConflictException {
     if (config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()) {
+      // deal with pendingInstants
+      Stream<HoodieInstant> completedInstantsDuringCurrentWriteOperation = getCompletedInstantsDuringCurrentWriteOperation(table.getMetaClient(), pendingInstants);
+
       ConflictResolutionStrategy resolutionStrategy = config.getWriteConflictResolutionStrategy();
-      Stream<HoodieInstant> instantStream = resolutionStrategy.getCandidateInstants(reloadActiveTimeline
-          ? table.getMetaClient().reloadActiveTimeline() : table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant);
+      Stream<HoodieInstant> instantStream = Stream.concat(resolutionStrategy.getCandidateInstants(reloadActiveTimeline
+          ? table.getMetaClient().reloadActiveTimeline() : table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant),
+              completedInstantsDuringCurrentWriteOperation);
       final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElse(new HoodieCommitMetadata()));
       instantStream.forEach(instant -> {
         try {
@@ -137,4 +127,35 @@ public static Option<Pair<HoodieInstant, Map<String, String>>> getLastCompletedT
       throw new HoodieIOException("Unable to read metadata for instant " + hoodieInstantOption.get(), io);
     }
   }
-}
\ No newline at end of file
+
+  /**
+   * Get InflightAndRequest instants.
+   *
+   * @param metaClient
+   * @return
+   */
+  public static Set<String> getInflightAndRequestedInstants(HoodieTableMetaClient metaClient) {
+    // collect InflightAndRequest instants for deltaCommit/commit/compaction/clustering
+    Set<String> timelineActions = CollectionUtils
+        .createImmutableSet(HoodieTimeline.REPLACE_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMMIT_ACTION);
+    return metaClient
+        .getActiveTimeline()
+        .getTimelineOfActions(timelineActions)
+        .filterInflightsAndRequested()
+        .getInstants()
+        .map(HoodieInstant::getTimestamp)
+        .collect(Collectors.toSet());
+  }
+
+  public static Stream<HoodieInstant> getCompletedInstantsDuringCurrentWriteOperation(HoodieTableMetaClient metaClient, Set<String> pendingInstants) {
+    // deal with pendingInstants
+    // some pending instants maybe finished during current write operation,
+    // we should check the conflict of those pending operation
+    return metaClient
+        .reloadActiveTimeline()
+        .getCommitsTimeline()
+        .filterCompletedInstants()
+        .getInstants()
+        .filter(f -> pendingInstants.contains(f.getTimestamp()));
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
index f82f14d5a9c64..7c1f7e00e7fb1 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
@@ -115,6 +115,14 @@ public class HoodieIndexConfig extends HoodieConfig {
           + "When true, the input RDD will cached to speed up index lookup by reducing IO "
           + "for computing parallelism or affected partitions");
 
+  public static final ConfigProperty<Boolean> BLOOM_INDEX_USE_METADATA = ConfigProperty
+      .key("hoodie.bloom.index.use.metadata")
+      .defaultValue(false)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Only applies if index type is BLOOM."
+          + "When true, the index lookup uses bloom filters and column stats from metadata "
+          + "table when available to speed up the process.");
+
   public static final ConfigProperty<String> BLOOM_INDEX_TREE_BASED_FILTER = ConfigProperty
       .key("hoodie.bloom.index.use.treebased.filter")
       .defaultValue("true")
@@ -490,6 +498,11 @@ public Builder bloomIndexUseCaching(boolean useCaching) {
       return this;
     }
 
+    public Builder bloomIndexUseMetadata(boolean useMetadata) {
+      hoodieIndexConfig.setValue(BLOOM_INDEX_USE_METADATA, String.valueOf(useMetadata));
+      return this;
+    }
+
     public Builder bloomIndexTreebasedFilter(boolean useTreeFilter) {
       hoodieIndexConfig.setValue(BLOOM_INDEX_TREE_BASED_FILTER, String.valueOf(useTreeFilter));
       return this;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 776c9066767c9..d861ffe970c80 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -167,6 +167,22 @@ public class HoodieWriteConfig extends HoodieConfig {
           + "implementations of HoodieRecordPayload to convert incoming records to avro. This is also used as the write schema "
           + "evolving records during an update.");
 
+  public static final ConfigProperty<String> INTERNAL_SCHEMA_STRING = ConfigProperty
+      .key("hoodie.internal.schema")
+      .noDefaultValue()
+      .withDocumentation("Schema string representing the latest schema of the table. Hudi passes this to "
+          + "implementations of evolution of schema");
+
+  public static final ConfigProperty<Boolean> SCHEMA_EVOLUTION_ENABLE = ConfigProperty
+      .key("hoodie.schema.on.read.enable")
+      .defaultValue(false)
+      .withDocumentation("enable full schema evolution for hoodie");
+
+  public static final ConfigProperty<Boolean> ENABLE_INTERNAL_SCHEMA_CACHE = ConfigProperty
+      .key("hoodie.schema.cache.enable")
+      .defaultValue(false)
+      .withDocumentation("cache query internalSchemas in driver/executor side");
+
   public static final ConfigProperty<String> AVRO_SCHEMA_VALIDATE_ENABLE = ConfigProperty
       .key("hoodie.avro.schema.validate")
       .defaultValue("false")
@@ -464,6 +480,12 @@ public class HoodieWriteConfig extends HoodieConfig {
       .sinceVersion("0.11.0")
       .withDocumentation("Control to enable release all persist rdds when the spark job finish.");
 
+  public static final ConfigProperty<Boolean> AUTO_ADJUST_LOCK_CONFIGS = ConfigProperty
+      .key("hoodie.auto.adjust.lock.configs")
+      .defaultValue(false)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Auto adjust lock configurations when metadata table is enabled and for async table services.");
+
   private ConsistencyGuardConfig consistencyGuardConfig;
   private FileSystemRetryConfig fileSystemRetryConfig;
 
@@ -886,6 +908,30 @@ public void setSchema(String schemaStr) {
     setValue(AVRO_SCHEMA_STRING, schemaStr);
   }
 
+  public String getInternalSchema() {
+    return getString(INTERNAL_SCHEMA_STRING);
+  }
+
+  public boolean getInternalSchemaCacheEnable() {
+    return getBoolean(ENABLE_INTERNAL_SCHEMA_CACHE);
+  }
+
+  public void setInternalSchemaString(String internalSchemaString) {
+    setValue(INTERNAL_SCHEMA_STRING, internalSchemaString);
+  }
+
+  public void setInternalSchemaCacheEnable(boolean enable) {
+    setValue(ENABLE_INTERNAL_SCHEMA_CACHE, String.valueOf(enable));
+  }
+
+  public boolean getSchemaEvolutionEnable() {
+    return getBoolean(SCHEMA_EVOLUTION_ENABLE);
+  }
+
+  public void setSchemaEvolutionEnable(boolean enable) {
+    setValue(SCHEMA_EVOLUTION_ENABLE, String.valueOf(enable));
+  }
+
   /**
    * Get the write schema for written records.
    *
@@ -1495,6 +1541,10 @@ public boolean getBloomIndexUseCaching() {
     return getBoolean(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING);
   }
 
+  public boolean getBloomIndexUseMetadata() {
+    return getBooleanOrDefault(HoodieIndexConfig.BLOOM_INDEX_USE_METADATA);
+  }
+
   public boolean useBloomIndexTreebasedFilter() {
     return getBoolean(HoodieIndexConfig.BLOOM_INDEX_TREE_BASED_FILTER);
   }
@@ -1507,8 +1557,24 @@ public boolean isMetadataBloomFilterIndexEnabled() {
     return isMetadataTableEnabled() && getMetadataConfig().isBloomFilterIndexEnabled();
   }
 
-  public boolean isMetadataIndexColumnStatsForAllColumnsEnabled() {
-    return isMetadataTableEnabled() && getMetadataConfig().isMetadataColumnStatsIndexForAllColumnsEnabled();
+  public boolean isMetadataColumnStatsIndexEnabled() {
+    return isMetadataTableEnabled() && getMetadataConfig().isColumnStatsIndexEnabled();
+  }
+
+  public List<String> getColumnsEnabledForColumnStatsIndex() {
+    return getMetadataConfig().getColumnsEnabledForColumnStatsIndex();
+  }
+
+  public List<String> getColumnsEnabledForBloomFilterIndex() {
+    return getMetadataConfig().getColumnsEnabledForBloomFilterIndex();
+  }
+
+  public int getIndexingCheckTimeoutSeconds() {
+    return getMetadataConfig().getIndexingCheckTimeoutSeconds();
+  }
+
+  public int getMetadataBloomFilterIndexParallelism() {
+    return metadataConfig.getBloomFilterIndexParallelism();
   }
 
   public int getColumnStatsIndexParallelism() {
@@ -1892,6 +1958,10 @@ public boolean isMetadataAsyncClean() {
     return getBoolean(HoodieMetadataConfig.ASYNC_CLEAN_ENABLE);
   }
 
+  public boolean isMetadataAsyncIndex() {
+    return getBooleanOrDefault(HoodieMetadataConfig.ASYNC_INDEX_ENABLE);
+  }
+
   public int getMetadataMaxCommitsToKeep() {
     return getInt(HoodieMetadataConfig.MAX_COMMITS_TO_KEEP);
   }
@@ -1908,6 +1978,9 @@ public int getMetadataCleanerCommitsRetained() {
    * Hoodie Client Lock Configs.
    * @return
    */
+  public boolean isAutoAdjustLockConfigs() {
+    return getBooleanOrDefault(AUTO_ADJUST_LOCK_CONFIGS);
+  }
 
   public String getLockProviderClass() {
     return getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME);
@@ -2059,6 +2132,16 @@ public Builder withSchema(String schemaStr) {
       return this;
     }
 
+    public Builder withSchemaEvolutionEnable(boolean enable) {
+      writeConfig.setValue(SCHEMA_EVOLUTION_ENABLE, String.valueOf(enable));
+      return this;
+    }
+
+    public Builder withInternalSchemaCacheEnable(boolean enable) {
+      writeConfig.setValue(ENABLE_INTERNAL_SCHEMA_CACHE, String.valueOf(enable));
+      return this;
+    }
+
     public Builder withAvroSchemaValidate(boolean enable) {
       writeConfig.setValue(AVRO_SCHEMA_VALIDATE_ENABLE, String.valueOf(enable));
       return this;
@@ -2373,6 +2456,11 @@ public Builder withProperties(Properties properties) {
       return this;
     }
 
+    public Builder withAutoAdjustLockConfigs(boolean autoAdjustLockConfigs) {
+      writeConfig.setValue(AUTO_ADJUST_LOCK_CONFIGS, String.valueOf(autoAdjustLockConfigs));
+      return this;
+    }
+
     protected void setDefaults() {
       writeConfig.setDefaultValue(MARKERS_TYPE, getDefaultMarkersType(engineType));
       // Check for mandatory properties
@@ -2410,41 +2498,42 @@ protected void setDefaults() {
           HoodieLayoutConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
       writeConfig.setDefaultValue(TIMELINE_LAYOUT_VERSION_NUM, String.valueOf(TimelineLayoutVersion.CURR_VERSION));
 
-      autoAdjustConfigsForConcurrencyMode();
-    }
-
-    private void autoAdjustConfigsForConcurrencyMode() {
-      boolean isMetadataTableEnabled = writeConfig.getBoolean(HoodieMetadataConfig.ENABLE);
+      // isLockProviderPropertySet must be fetched before setting defaults of HoodieLockConfig
       final TypedProperties writeConfigProperties = writeConfig.getProps();
       final boolean isLockProviderPropertySet = writeConfigProperties.containsKey(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)
           || writeConfigProperties.containsKey(HoodieLockConfig.LOCK_PROVIDER_CLASS_PROP);
-      
-      if (!isLockConfigSet) {
-        HoodieLockConfig.Builder lockConfigBuilder = HoodieLockConfig.newBuilder().fromProperties(writeConfig.getProps());
-        writeConfig.setDefault(lockConfigBuilder.build());
-      }
-
-      if (isMetadataTableEnabled) {
-        // When metadata table is enabled, optimistic concurrency control must be used for
-        // single writer with async table services.
-        // Async table services can update the metadata table and a lock provider is
-        // needed to guard against any concurrent table write operations. If user has
-        // not configured any lock provider, let's use the InProcess lock provider.
-        boolean areTableServicesEnabled = writeConfig.areTableServicesEnabled();
-        boolean areAsyncTableServicesEnabled = writeConfig.areAnyTableServicesAsync();
-
-        if (!isLockProviderPropertySet && areTableServicesEnabled && areAsyncTableServicesEnabled) {
-          // This is targeted at Single writer with async table services
-          // If user does not set the lock provider, likely that the concurrency mode is not set either
-          // Override the configs for metadata table
-          writeConfig.setValue(WRITE_CONCURRENCY_MODE.key(),
-              WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value());
-          writeConfig.setValue(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
-              InProcessLockProvider.class.getName());
-          LOG.info(String.format("Automatically set %s=%s and %s=%s since user has not set the "
-                  + "lock provider for single writer with async table services",
-              WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value(),
-              HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), InProcessLockProvider.class.getName()));
+      writeConfig.setDefaultOnCondition(!isLockConfigSet,
+          HoodieLockConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
+
+      autoAdjustConfigsForConcurrencyMode(isLockProviderPropertySet);
+    }
+
+    private void autoAdjustConfigsForConcurrencyMode(boolean isLockProviderPropertySet) {
+      if (writeConfig.isAutoAdjustLockConfigs()) {
+        // auto adjustment is required only for deltastreamer and spark streaming where async table services can be executed in the same JVM.
+        boolean isMetadataTableEnabled = writeConfig.getBoolean(HoodieMetadataConfig.ENABLE);
+
+        if (isMetadataTableEnabled) {
+          // When metadata table is enabled, optimistic concurrency control must be used for
+          // single writer with async table services.
+          // Async table services can update the metadata table and a lock provider is
+          // needed to guard against any concurrent table write operations. If user has
+          // not configured any lock provider, let's use the InProcess lock provider.
+          boolean areTableServicesEnabled = writeConfig.areTableServicesEnabled();
+          boolean areAsyncTableServicesEnabled = writeConfig.areAnyTableServicesAsync();
+          if (!isLockProviderPropertySet && areTableServicesEnabled && areAsyncTableServicesEnabled) {
+            // This is targeted at Single writer with async table services
+            // If user does not set the lock provider, likely that the concurrency mode is not set either
+            // Override the configs for metadata table
+            writeConfig.setValue(WRITE_CONCURRENCY_MODE.key(),
+                WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value());
+            writeConfig.setValue(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
+                InProcessLockProvider.class.getName());
+            LOG.info(String.format("Automatically set %s=%s and %s=%s since user has not set the "
+                    + "lock provider for single writer with async table services",
+                WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value(),
+                HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), InProcessLockProvider.class.getName()));
+          }
         }
       }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java
index 8396540394b86..85f98935fd3c3 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/metrics/HoodieMetricsConfig.java
@@ -181,7 +181,7 @@ public HoodieMetricsConfig build() {
       hoodieMetricsConfig.setDefaultOnCondition(reporterType == MetricsReporterType.GRAPHITE,
           HoodieMetricsGraphiteConfig.newBuilder().fromProperties(hoodieMetricsConfig.getProps()).build());
       hoodieMetricsConfig.setDefaultOnCondition(reporterType == MetricsReporterType.CLOUDWATCH,
-              HoodieMetricsCloudWatchConfig.newBuilder().fromProperties(hoodieMetricsConfig.getProps()).build());
+            HoodieMetricsCloudWatchConfig.newBuilder().fromProperties(hoodieMetricsConfig.getProps()).build());
       return hoodieMetricsConfig;
     }
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieDeletePartitionException.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieDeletePartitionException.java
new file mode 100644
index 0000000000000..34eb734b32423
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieDeletePartitionException.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.exception;
+
+/**
+ * <p>
+ * Exception thrown for any higher level errors when doing delete partitions.
+ * </p>
+ */
+public class HoodieDeletePartitionException extends HoodieException {
+
+  public HoodieDeletePartitionException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieDeletePartitionException(String msg) {
+    super(msg);
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
index d3e73c058cc56..aeaf78672680d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
@@ -53,7 +53,11 @@
 import static java.util.stream.Collectors.groupingBy;
 import static java.util.stream.Collectors.mapping;
 import static java.util.stream.Collectors.toList;
+import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
 import static org.apache.hudi.index.HoodieIndexUtils.getLatestBaseFilesForAllPartitions;
+import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
 
 /**
  * Indexing mechanism based on bloom filter. Each parquet file includes its row_key bloom filter in its metadata.
@@ -118,14 +122,7 @@ private HoodiePairData<HoodieKey, HoodieRecordLocation> lookupIndex(
     List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
 
     // Step 2: Load all involved files as <Partition, filename> pairs
-    List<Pair<String, BloomIndexFileInfo>> fileInfoList;
-    if (config.getBloomIndexPruneByRanges()) {
-      fileInfoList = (config.getMetadataConfig().isColumnStatsIndexEnabled()
-          ? loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable)
-          : loadColumnRangesFromFiles(affectedPartitionPathList, context, hoodieTable));
-    } else {
-      fileInfoList = getFileInfoForLatestBaseFiles(affectedPartitionPathList, context, hoodieTable);
-    }
+    List<Pair<String, BloomIndexFileInfo>> fileInfoList = getBloomIndexFileInfoForPartitions(context, hoodieTable, affectedPartitionPathList);
     final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo =
         fileInfoList.stream().collect(groupingBy(Pair::getLeft, mapping(Pair::getRight, toList())));
 
@@ -138,6 +135,28 @@ private HoodiePairData<HoodieKey, HoodieRecordLocation> lookupIndex(
         partitionRecordKeyPairs, fileComparisonPairs, partitionToFileInfo, recordsPerPartition);
   }
 
+  private List<Pair<String, BloomIndexFileInfo>> getBloomIndexFileInfoForPartitions(HoodieEngineContext context,
+                                                                                    HoodieTable hoodieTable,
+                                                                                    List<String> affectedPartitionPathList) {
+    List<Pair<String, BloomIndexFileInfo>> fileInfoList = new ArrayList<>();
+
+    if (config.getBloomIndexPruneByRanges()) {
+      // load column ranges from metadata index if column stats index is enabled and column_stats metadata partition is available
+      if (config.getBloomIndexUseMetadata()
+          && getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig()).contains(COLUMN_STATS.getPartitionPath())) {
+        fileInfoList = loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable);
+      }
+      // fallback to loading column ranges from files
+      if (isNullOrEmpty(fileInfoList)) {
+        fileInfoList = loadColumnRangesFromFiles(affectedPartitionPathList, context, hoodieTable);
+      }
+    } else {
+      fileInfoList = getFileInfoForLatestBaseFiles(affectedPartitionPathList, context, hoodieTable);
+    }
+
+    return fileInfoList;
+  }
+
   /**
    * Load all involved files as <Partition, filename> pair List.
    */
@@ -188,7 +207,7 @@ private List<Pair<String, BloomIndexFileInfo>> getFileInfoForLatestBaseFiles(
    * @return List of partition and file column range info pairs
    */
   protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(
-      List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
+      List<String> partitions, final HoodieEngineContext context, final HoodieTable<?, ?, ?, ?> hoodieTable) {
     // also obtain file ranges, if range pruning is enabled
     context.setJobStatus(this.getClass().getName(), "Load meta index key ranges for file slices");
 
@@ -203,15 +222,16 @@ protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(
         return Stream.empty();
       }
       try {
-        Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap = hoodieTable
-            .getMetadataTable().getColumnStats(partitionFileNameList, keyField);
+        Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap =
+            hoodieTable.getMetadataTable().getColumnStats(partitionFileNameList, keyField);
         List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
         for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry : fileToColumnStatsMap.entrySet()) {
           result.add(Pair.of(entry.getKey().getLeft(),
               new BloomIndexFileInfo(
                   FSUtils.getFileId(entry.getKey().getRight()),
-                  entry.getValue().getMinValue(),
-                  entry.getValue().getMaxValue()
+                  // NOTE: Here we assume that the type of the primary key field is string
+                  (String) unwrapStatisticValueWrapper(entry.getValue().getMinValue()),
+                  (String) unwrapStatisticValueWrapper(entry.getValue().getMaxValue())
               )));
         }
         return result.stream();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
index ddd95721a46b6..1a07c4063f358 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
@@ -38,11 +38,15 @@ public static int getBucketId(HoodieRecord record, String indexKeyFields, int nu
   }
 
   public static int getBucketId(HoodieKey hoodieKey, String indexKeyFields, int numBuckets) {
+    return getBucketId(hoodieKey.getRecordKey(), indexKeyFields, numBuckets);
+  }
+
+  public static int getBucketId(String recordKey, String indexKeyFields, int numBuckets) {
     List<String> hashKeyFields;
-    if (!hoodieKey.getRecordKey().contains(":")) {
-      hashKeyFields = Collections.singletonList(hoodieKey.getRecordKey());
+    if (!recordKey.contains(":")) {
+      hashKeyFields = Collections.singletonList(recordKey);
     } else {
-      Map<String, String> recordKeyPairs = Arrays.stream(hoodieKey.getRecordKey().split(","))
+      Map<String, String> recordKeyPairs = Arrays.stream(recordKey.split(","))
           .map(p -> p.split(":"))
           .collect(Collectors.toMap(p -> p[0], p -> p[1]));
       hashKeyFields = Arrays.stream(indexKeyFields.split(","))
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
index a58e4d65d2879..7fc46e8b9bbc4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
@@ -18,15 +18,19 @@
 
 package org.apache.hudi.io;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.BaseFile;
+import org.apache.hudi.common.model.DeleteRecord;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
 import org.apache.hudi.common.model.HoodieDeltaWriteStat;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
@@ -55,25 +59,22 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.table.HoodieTable;
-
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
 
-import static org.apache.hudi.metadata.HoodieTableMetadataUtil.accumulateColumnRanges;
-import static org.apache.hudi.metadata.HoodieTableMetadataUtil.aggregateColumnStats;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.collectColumnRangeMetadata;
 
 /**
  * IO Operation to append data onto an existing file.
@@ -88,7 +89,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
   // Buffer for holding records in memory before they are flushed to disk
   private final List<IndexedRecord> recordList = new ArrayList<>();
   // Buffer for holding records (to be deleted) in memory before they are flushed to disk
-  private final List<HoodieKey> keysToDelete = new ArrayList<>();
+  private final List<DeleteRecord> recordsToDelete = new ArrayList<>();
   // Incoming records to be written to logs.
   protected Iterator<HoodieRecord<T>> recordItr;
   // Writer to log into the file group's latest slice.
@@ -171,7 +172,8 @@ private void init(HoodieRecord record) {
       try {
         // Save hoodie partition meta in the partition path
         HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime,
-            new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
+            new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath),
+            hoodieTable.getPartitionMetafileFormat());
         partitionMetadata.trySave(getPartitionId());
 
         // Since the actual log file written to can be different based on when rollover happens, we use the
@@ -343,17 +345,24 @@ private void processAppendResult(AppendResult result, List<IndexedRecord> record
       updateWriteStatus(stat, result);
     }
 
-    if (config.isMetadataIndexColumnStatsForAllColumnsEnabled()) {
-      Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangeMap = stat.getRecordsStats().isPresent()
-          ? stat.getRecordsStats().get().getStats() : new HashMap<>();
-      final String filePath = stat.getPath();
-      // initialize map of column name to map of stats name to stats value
-      Map<String, Map<String, Object>> columnToStats = new HashMap<>();
-      writeSchemaWithMetaFields.getFields().forEach(field -> columnToStats.putIfAbsent(field.name(), new HashMap<>()));
-      // collect stats for columns at once per record and keep iterating through every record to eventually find col stats for all fields.
-      recordList.forEach(record -> aggregateColumnStats(record, writeSchemaWithMetaFields, columnToStats, config.isConsistentLogicalTimestampEnabled()));
-      writeSchemaWithMetaFields.getFields().forEach(field -> accumulateColumnRanges(field, filePath, columnRangeMap, columnToStats));
-      stat.setRecordsStats(new HoodieDeltaWriteStat.RecordsStats<>(columnRangeMap));
+    if (config.isMetadataColumnStatsIndexEnabled()) {
+      final List<Schema.Field> fieldsToIndex;
+      // If column stats index is enabled but columns not configured then we assume that
+      // all columns should be indexed
+      if (config.getColumnsEnabledForColumnStatsIndex().isEmpty()) {
+        fieldsToIndex = writeSchemaWithMetaFields.getFields();
+      } else {
+        Set<String> columnsToIndexSet = new HashSet<>(config.getColumnsEnabledForColumnStatsIndex());
+
+        fieldsToIndex = writeSchemaWithMetaFields.getFields().stream()
+            .filter(field -> columnsToIndexSet.contains(field.name()))
+            .collect(Collectors.toList());
+      }
+
+      Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangesMetadataMap =
+          collectColumnRangeMetadata(recordList, fieldsToIndex, stat.getPath());
+
+      stat.setRecordsStats(columnRangesMetadataMap);
     }
 
     resetWriteCounts();
@@ -387,15 +396,15 @@ protected void appendDataAndDeleteBlocks(Map<HeaderMetadataType, String> header)
         blocks.add(getBlock(config, pickLogDataBlockFormat(), recordList, header, keyField));
       }
 
-      if (keysToDelete.size() > 0) {
-        blocks.add(new HoodieDeleteBlock(keysToDelete.toArray(new HoodieKey[keysToDelete.size()]), header));
+      if (recordsToDelete.size() > 0) {
+        blocks.add(new HoodieDeleteBlock(recordsToDelete.toArray(new DeleteRecord[0]), header));
       }
 
       if (blocks.size() > 0) {
         AppendResult appendResult = writer.appendBlocks(blocks);
         processAppendResult(appendResult, recordList);
         recordList.clear();
-        keysToDelete.clear();
+        recordsToDelete.clear();
       }
     } catch (Exception e) {
       throw new HoodieAppendException("Failed while appending records to " + writer.getLogFile().getPath(), e);
@@ -457,7 +466,7 @@ public List<WriteStatus> writeStatuses() {
   }
 
   private Writer createLogWriter(Option<FileSlice> fileSlice, String baseCommitTime)
-      throws IOException, InterruptedException {
+      throws IOException {
     Option<HoodieLogFile> latestLogFile = fileSlice.get().getLatestLogFile();
 
     return HoodieLogFormat.newWriterBuilder()
@@ -492,14 +501,16 @@ private void writeToBuffer(HoodieRecord<T> record) {
       record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
       record.seal();
     }
+    // fetch the ordering val first in case the record was deflated.
+    final Comparable<?> orderingVal = record.getData().getOrderingValue();
     Option<IndexedRecord> indexedRecord = getIndexedRecord(record);
     if (indexedRecord.isPresent()) {
-      // Skip the Ignore Record.
+      // Skip the ignored record.
       if (!indexedRecord.get().equals(IGNORE_RECORD)) {
         recordList.add(indexedRecord.get());
       }
     } else {
-      keysToDelete.add(record.getKey());
+      recordsToDelete.add(DeleteRecord.create(record.getKey(), orderingVal));
     }
     numberOfRecords++;
   }
@@ -548,7 +559,8 @@ private static HoodieLogBlock getBlock(HoodieWriteConfig writeConfig,
       case AVRO_DATA_BLOCK:
         return new HoodieAvroDataBlock(recordList, header, keyField);
       case HFILE_DATA_BLOCK:
-        return new HoodieHFileDataBlock(recordList, header, writeConfig.getHFileCompressionAlgorithm());
+        return new HoodieHFileDataBlock(
+            recordList, header, writeConfig.getHFileCompressionAlgorithm(), new Path(writeConfig.getBasePath()));
       case PARQUET_DATA_BLOCK:
         return new HoodieParquetDataBlock(recordList, header, keyField, writeConfig.getParquetCompressionCodec());
       default:
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
index 3e7e0b16e2cf8..91a7622bf8065 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
@@ -59,7 +59,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends
   protected long recordsDeleted = 0;
   private Map<String, HoodieRecord<T>> recordMap;
   private boolean useWriterSchema = false;
-  private boolean preserveHoodieMetadata = false;
+  private final boolean preserveMetadata;
 
   public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
                             String partitionPath, String fileId, TaskContextSupplier taskContextSupplier) {
@@ -69,9 +69,9 @@ public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTa
 
   public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
                             String partitionPath, String fileId, TaskContextSupplier taskContextSupplier,
-                            boolean preserveHoodieMetadata) {
+                            boolean preserveMetadata) {
     this(config, instantTime, hoodieTable, partitionPath, fileId, Option.empty(),
-        taskContextSupplier, preserveHoodieMetadata);
+        taskContextSupplier, preserveMetadata);
   }
 
   public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
@@ -82,10 +82,10 @@ public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTa
 
   public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
                             String partitionPath, String fileId, Option<Schema> overriddenSchema,
-                            TaskContextSupplier taskContextSupplier, boolean preserveHoodieMetadata) {
+                            TaskContextSupplier taskContextSupplier, boolean preserveMetadata) {
     super(config, instantTime, partitionPath, fileId, hoodieTable, overriddenSchema,
         taskContextSupplier);
-    this.preserveHoodieMetadata = preserveHoodieMetadata;
+    this.preserveMetadata = preserveMetadata;
     writeStatus.setFileId(fileId);
     writeStatus.setPartitionPath(partitionPath);
     writeStatus.setStat(new HoodieWriteStat());
@@ -94,7 +94,8 @@ public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTa
 
     try {
       HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime,
-          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
+          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath),
+          hoodieTable.getPartitionMetafileFormat());
       partitionMetadata.trySave(getPartitionId());
       createMarkerFile(partitionPath, FSUtils.makeDataFileName(this.instantTime, this.writeToken, this.fileId, hoodieTable.getBaseFileExtension()));
       this.fileWriter = HoodieFileWriterFactory.getFileWriter(instantTime, path, hoodieTable, config,
@@ -111,7 +112,7 @@ public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTa
   public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
       String partitionPath, String fileId, Map<String, HoodieRecord<T>> recordMap,
       TaskContextSupplier taskContextSupplier) {
-    this(config, instantTime, hoodieTable, partitionPath, fileId, taskContextSupplier);
+    this(config, instantTime, hoodieTable, partitionPath, fileId, taskContextSupplier, config.isPreserveHoodieCommitMetadataForCompaction());
     this.recordMap = recordMap;
     this.useWriterSchema = true;
   }
@@ -137,13 +138,11 @@ public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
           return;
         }
         // Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
-        IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) avroRecord.get());
-        if (preserveHoodieMetadata) {
-          // do not preserve FILENAME_METADATA_FIELD
-          recordWithMetadataInSchema.put(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD), path.getName());
-          fileWriter.writeAvro(record.getRecordKey(), recordWithMetadataInSchema);
+        if (preserveMetadata) {
+          fileWriter.writeAvro(record.getRecordKey(),
+              rewriteRecordWithMetadata((GenericRecord) avroRecord.get(), path.getName()));
         } else {
-          fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, record);
+          fileWriter.writeAvroWithMetadata(rewriteRecord((GenericRecord) avroRecord.get()), record);
         }
         // update the new location of record, so we know where to find it next
         record.unseal();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
index bad822c8dbdbc..36ee7d96709ab 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
@@ -37,6 +37,9 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
+
 /**
  * Takes a bunch of keys and returns ones that are present in the file group.
  */
@@ -60,7 +63,9 @@ private BloomFilter getBloomFilter() {
     BloomFilter bloomFilter = null;
     HoodieTimer timer = new HoodieTimer().startTimer();
     try {
-      if (config.isMetadataBloomFilterIndexEnabled()) {
+      if (config.getBloomIndexUseMetadata()
+          && getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig())
+          .contains(BLOOM_FILTERS.getPartitionPath())) {
         bloomFilter = hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight())
             .orElseThrow(() -> new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight()));
       } else {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
index d38f66a86f912..06e752f59daea 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
@@ -61,8 +61,6 @@
 import java.util.Map;
 import java.util.Set;
 
-import static org.apache.hudi.common.model.HoodieRecord.FILENAME_METADATA_FIELD_POS;
-
 @SuppressWarnings("Duplicates")
 /**
  * Handle to merge incoming records to those in storage.
@@ -107,7 +105,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
   protected long recordsDeleted = 0;
   protected long updatedRecordsWritten = 0;
   protected long insertRecordsWritten = 0;
-  protected boolean useWriterSchema;
+  protected boolean useWriterSchemaForCompaction;
   protected Option<BaseKeyGenerator> keyGeneratorOpt;
   private HoodieBaseFile baseFileToMerge;
 
@@ -135,7 +133,7 @@ public HoodieMergeHandle(HoodieWriteConfig config, String instantTime, HoodieTab
                            HoodieBaseFile dataFileToBeMerged, TaskContextSupplier taskContextSupplier, Option<BaseKeyGenerator> keyGeneratorOpt) {
     super(config, instantTime, partitionPath, fileId, hoodieTable, taskContextSupplier);
     this.keyToNewRecords = keyToNewRecords;
-    this.useWriterSchema = true;
+    this.useWriterSchemaForCompaction = true;
     this.preserveMetadata = config.isPreserveHoodieCommitMetadataForCompaction();
     init(fileId, this.partitionPath, dataFileToBeMerged);
     validateAndSetAndKeyGenProps(keyGeneratorOpt, config.populateMetaFields());
@@ -168,7 +166,8 @@ private void init(String fileId, String partitionPath, HoodieBaseFile baseFileTo
       writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
 
       HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime,
-          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
+          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath),
+          hoodieTable.getPartitionMetafileFormat());
       partitionMetadata.trySave(getPartitionId());
 
       String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
@@ -248,9 +247,9 @@ protected void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
     }
     LOG.info("Number of entries in MemoryBasedMap => "
         + ((ExternalSpillableMap) keyToNewRecords).getInMemoryMapNumEntries()
-        + "Total size in bytes of MemoryBasedMap => "
-        + ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize() + "Number of entries in BitCaskDiskMap => "
-        + ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries() + "Size of file spilled to disk => "
+        + ", Total size in bytes of MemoryBasedMap => "
+        + ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize() + ", Number of entries in BitCaskDiskMap => "
+        + ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries() + ", Size of file spilled to disk => "
         + ((ExternalSpillableMap) keyToNewRecords).getSizeOfFileOnDiskInBytes());
   }
 
@@ -264,26 +263,26 @@ private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, GenericRecord ol
         isDelete = HoodieOperation.isDelete(hoodieRecord.getOperation());
       }
     }
-    return writeRecord(hoodieRecord, indexedRecord, isDelete, oldRecord);
+    return writeRecord(hoodieRecord, indexedRecord, isDelete);
   }
 
   protected void writeInsertRecord(HoodieRecord<T> hoodieRecord) throws IOException {
-    Schema schema = useWriterSchema ? tableSchemaWithMetaFields : tableSchema;
+    Schema schema = useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema;
     Option<IndexedRecord> insertRecord = hoodieRecord.getData().getInsertValue(schema, config.getProps());
     // just skip the ignored record
     if (insertRecord.isPresent() && insertRecord.get().equals(IGNORE_RECORD)) {
       return;
     }
-    if (writeRecord(hoodieRecord, insertRecord, HoodieOperation.isDelete(hoodieRecord.getOperation()), null)) {
+    if (writeRecord(hoodieRecord, insertRecord, HoodieOperation.isDelete(hoodieRecord.getOperation()))) {
       insertRecordsWritten++;
     }
   }
 
   protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord) {
-    return writeRecord(hoodieRecord, indexedRecord, false, null);
+    return writeRecord(hoodieRecord, indexedRecord, false);
   }
 
-  protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord, boolean isDelete, GenericRecord oldRecord) {
+  protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord, boolean isDelete) {
     Option recordMetadata = hoodieRecord.getData().getMetadata();
     if (!partitionPath.equals(hoodieRecord.getPartitionPath())) {
       HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: "
@@ -294,13 +293,11 @@ protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord
     try {
       if (indexedRecord.isPresent() && !isDelete) {
         // Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
-        IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) indexedRecord.get(), preserveMetadata, oldRecord);
-        if (preserveMetadata && useWriterSchema) { // useWriteSchema will be true only incase of compaction.
-          // do not preserve FILENAME_METADATA_FIELD
-          recordWithMetadataInSchema.put(FILENAME_METADATA_FIELD_POS, newFilePath.getName());
-          fileWriter.writeAvro(hoodieRecord.getRecordKey(), recordWithMetadataInSchema);
+        if (preserveMetadata && useWriterSchemaForCompaction) { // useWriteSchema will be true only in case of compaction.
+          fileWriter.writeAvro(hoodieRecord.getRecordKey(),
+              rewriteRecordWithMetadata((GenericRecord) indexedRecord.get(), newFilePath.getName()));
         } else {
-          fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, hoodieRecord);
+          fileWriter.writeAvroWithMetadata(rewriteRecord((GenericRecord) indexedRecord.get()), hoodieRecord);
         }
         recordsWritten++;
       } else {
@@ -332,7 +329,7 @@ public void write(GenericRecord oldRecord) {
       try {
         Option<IndexedRecord> combinedAvroRecord =
             hoodieRecord.getData().combineAndGetUpdateValue(oldRecord,
-              useWriterSchema ? tableSchemaWithMetaFields : tableSchema,
+              useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema,
                 config.getPayloadConfig().getProps());
 
         if (combinedAvroRecord.isPresent() && combinedAvroRecord.get().equals(IGNORE_RECORD)) {
@@ -341,8 +338,7 @@ public void write(GenericRecord oldRecord) {
         } else if (writeUpdateRecord(hoodieRecord, oldRecord, combinedAvroRecord)) {
           /*
            * ONLY WHEN 1) we have an update for this key AND 2) We are able to successfully
-           * write the the combined new
-           * value
+           * write the combined new value
            *
            * We no longer need to copy the old record over.
            */
@@ -358,6 +354,11 @@ public void write(GenericRecord oldRecord) {
     if (copyOldRecord) {
       // this should work as it is, since this is an existing record
       try {
+        // rewrite file names
+        // do not preserve FILENAME_METADATA_FIELD
+        if (preserveMetadata && useWriterSchemaForCompaction) {
+          oldRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, newFilePath.getName());
+        }
         fileWriter.writeAvro(key, oldRecord);
       } catch (IOException | RuntimeException e) {
         String errMsg = String.format("Failed to merge old record into new file for key %s from old file %s to new file %s with writerSchema %s",
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
index 897491b906aae..931b08c2fe0c2 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
@@ -90,7 +90,7 @@ public void write(GenericRecord oldRecord) {
         throw new HoodieUpsertException("Insert/Update not in sorted order");
       }
       try {
-        if (useWriterSchema) {
+        if (useWriterSchemaForCompaction) {
           writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchemaWithMetaFields, config.getProps()));
         } else {
           writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchema, config.getProps()));
@@ -113,7 +113,7 @@ public List<WriteStatus> close() {
         String key = newRecordKeysSorted.poll();
         HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
         if (!writtenRecordKeys.contains(hoodieRecord.getRecordKey())) {
-          if (useWriterSchema) {
+          if (useWriterSchemaForCompaction) {
             writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchemaWithMetaFields, config.getProps()));
           } else {
             writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchema, config.getProps()));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
index b7e2d6af5a0fb..89babc7725d6e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
@@ -227,8 +227,8 @@ protected GenericRecord rewriteRecord(GenericRecord record) {
     return HoodieAvroUtils.rewriteRecord(record, writeSchemaWithMetaFields);
   }
 
-  protected GenericRecord rewriteRecord(GenericRecord record, boolean copyOverMetaFields, GenericRecord fallbackRecord) {
-    return HoodieAvroUtils.rewriteRecord(record, writeSchemaWithMetaFields, copyOverMetaFields, fallbackRecord);
+  protected GenericRecord rewriteRecordWithMetadata(GenericRecord record, String fileName) {
+    return HoodieAvroUtils.rewriteRecordWithMetadata(record, writeSchemaWithMetaFields, fileName);
   }
 
   public abstract List<WriteStatus> close();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
index a5792349cad16..9f749566b255b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
@@ -37,8 +37,6 @@ public interface HoodieFileWriter<R extends IndexedRecord> {
 
   void writeAvro(String key, R oldRecord) throws IOException;
 
-  long getBytesWritten();
-
   default void prepRecordWithMetadata(R avroRecord, HoodieRecord record, String instantTime, Integer partitionId, AtomicLong recordIndex, String fileName) {
     String seqId = HoodieRecord.generateSequenceId(instantTime, partitionId, recordIndex.getAndIncrement());
     HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(), fileName);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
index 38db1cde41226..7d0c307dbfe53 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
@@ -30,6 +30,7 @@
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.avro.AvroSchemaConverter;
 
@@ -53,10 +54,12 @@ public static <T extends HoodieRecordPayload, R extends IndexedRecord, I, K, O>
       return newParquetFileWriter(instantTime, path, config, schema, hoodieTable, taskContextSupplier, config.populateMetaFields());
     }
     if (HFILE.getFileExtension().equals(extension)) {
-      return newHFileFileWriter(instantTime, path, config, schema, hoodieTable, taskContextSupplier);
+      return newHFileFileWriter(
+          instantTime, path, config, schema, hoodieTable.getHadoopConf(), taskContextSupplier);
     }
     if (ORC.getFileExtension().equals(extension)) {
-      return newOrcFileWriter(instantTime, path, config, schema, hoodieTable, taskContextSupplier);
+      return newOrcFileWriter(
+          instantTime, path, config, schema, hoodieTable.getHadoopConf(), taskContextSupplier);
     }
     throw new UnsupportedOperationException(extension + " format not supported yet.");
   }
@@ -64,28 +67,29 @@ public static <T extends HoodieRecordPayload, R extends IndexedRecord, I, K, O>
   private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newParquetFileWriter(
       String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable,
       TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException {
-    return newParquetFileWriter(instantTime, path, config, schema, hoodieTable, taskContextSupplier, populateMetaFields, populateMetaFields);
+    return newParquetFileWriter(instantTime, path, config, schema, hoodieTable.getHadoopConf(),
+        taskContextSupplier, populateMetaFields, populateMetaFields);
   }
 
   private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newParquetFileWriter(
-      String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable,
+      String instantTime, Path path, HoodieWriteConfig config, Schema schema, Configuration conf,
       TaskContextSupplier taskContextSupplier, boolean populateMetaFields, boolean enableBloomFilter) throws IOException {
     Option<BloomFilter> filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty();
-    HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(hoodieTable.getHadoopConf()).convert(schema), schema, filter);
+    HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(conf).convert(schema), schema, filter);
 
     HoodieAvroParquetConfig parquetConfig = new HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(),
         config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(),
-        hoodieTable.getHadoopConf(), config.getParquetCompressionRatio(), config.parquetDictionaryEnabled());
+        conf, config.getParquetCompressionRatio(), config.parquetDictionaryEnabled());
 
     return new HoodieParquetWriter<>(instantTime, path, parquetConfig, schema, taskContextSupplier, populateMetaFields);
   }
 
-  private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newHFileFileWriter(
-      String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable,
+  static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newHFileFileWriter(
+      String instantTime, Path path, HoodieWriteConfig config, Schema schema, Configuration conf,
       TaskContextSupplier taskContextSupplier) throws IOException {
 
     BloomFilter filter = createBloomFilter(config);
-    HoodieHFileConfig hfileConfig = new HoodieHFileConfig(hoodieTable.getHadoopConf(),
+    HoodieHFileConfig hfileConfig = new HoodieHFileConfig(conf,
         config.getHFileCompressionAlgorithm(), config.getHFileBlockSize(), config.getHFileMaxFileSize(),
         HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION,
         filter, HFILE_COMPARATOR);
@@ -94,10 +98,10 @@ private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFi
   }
 
   private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newOrcFileWriter(
-      String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable,
+      String instantTime, Path path, HoodieWriteConfig config, Schema schema, Configuration conf,
       TaskContextSupplier taskContextSupplier) throws IOException {
     BloomFilter filter = createBloomFilter(config);
-    HoodieOrcConfig orcConfig = new HoodieOrcConfig(hoodieTable.getHadoopConf(), config.getOrcCompressionCodec(),
+    HoodieOrcConfig orcConfig = new HoodieOrcConfig(conf, config.getOrcCompressionCodec(),
         config.getOrcStripeSize(), config.getOrcBlockSize(), config.getOrcMaxFileSize(), filter);
     return new HoodieOrcWriter<>(instantTime, path, orcConfig, schema, taskContextSupplier);
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
index 1079566b782f1..5ce377901a4ba 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
@@ -21,14 +21,14 @@
 import org.apache.hudi.common.bloom.BloomFilter;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 
 public class HoodieHFileConfig {
 
-  public static final KeyValue.KVComparator HFILE_COMPARATOR = new HoodieHBaseKVComparator();
+  public static final CellComparator HFILE_COMPARATOR = new HoodieHBaseKVComparator();
   public static final boolean PREFETCH_ON_OPEN = CacheConfig.DEFAULT_PREFETCH_ON_OPEN;
   public static final boolean CACHE_DATA_IN_L1 = HColumnDescriptor.DEFAULT_CACHE_DATA_IN_L1;
   // This is private in CacheConfig so have been copied here.
@@ -42,12 +42,12 @@ public class HoodieHFileConfig {
   private final boolean dropBehindCacheCompaction;
   private final Configuration hadoopConf;
   private final BloomFilter bloomFilter;
-  private final KeyValue.KVComparator hfileComparator;
+  private final CellComparator hfileComparator;
   private final String keyFieldName;
 
   public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compressionAlgorithm, int blockSize,
                            long maxFileSize, String keyFieldName, boolean prefetchBlocksOnOpen, boolean cacheDataInL1,
-                           boolean dropBehindCacheCompaction, BloomFilter bloomFilter, KeyValue.KVComparator hfileComparator) {
+                           boolean dropBehindCacheCompaction, BloomFilter bloomFilter, CellComparator hfileComparator) {
     this.hadoopConf = hadoopConf;
     this.compressionAlgorithm = compressionAlgorithm;
     this.blockSize = blockSize;
@@ -96,7 +96,7 @@ public BloomFilter getBloomFilter() {
     return bloomFilter;
   }
 
-  public KeyValue.KVComparator getHfileComparator() {
+  public CellComparator getHFileComparator() {
     return hfileComparator;
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
index 2ad6d7f9220b0..1642eb2c42fc6 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
@@ -25,6 +25,8 @@
 import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
@@ -38,8 +40,6 @@
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.io.Writable;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -95,6 +95,7 @@ public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileC
 
     HFileContext context = new HFileContextBuilder().withBlockSize(hfileConfig.getBlockSize())
         .withCompression(hfileConfig.getCompressionAlgorithm())
+        .withCellComparator(hfileConfig.getHFileComparator())
         .build();
 
     conf.set(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, String.valueOf(hfileConfig.shouldPrefetchBlocksOnOpen()));
@@ -104,10 +105,9 @@ public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileC
     this.writer = HFile.getWriterFactory(conf, cacheConfig)
         .withPath(this.fs, this.file)
         .withFileContext(context)
-        .withComparator(hfileConfig.getHfileComparator())
         .create();
 
-    writer.appendFileInfo(HoodieHFileReader.KEY_SCHEMA.getBytes(), schema.toString().getBytes());
+    writer.appendFileInfo(HoodieHFileReader.SCHEMA_KEY.getBytes(), schema.toString().getBytes());
   }
 
   @Override
@@ -187,9 +187,4 @@ public void readFields(DataInput in) throws IOException {
     writer.close();
     writer = null;
   }
-
-  @Override
-  public long getBytesWritten() {
-    return fs.getBytesWritten(file);
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
index 85d36cc685cd4..3fe8be05c09f0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
@@ -165,9 +165,4 @@ public void close() throws IOException {
 
     writer.close();
   }
-
-  @Override
-  public long getBytesWritten() {
-    return fs.getBytesWritten(file);
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
index 3cee8c816d41f..957a0ff52e91d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
@@ -97,7 +97,7 @@ public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOEx
 
   @Override
   public boolean canWrite() {
-    return fs.getBytesWritten(file) < maxFileSize;
+    return getDataSize() < maxFileSize;
   }
 
   @Override
@@ -107,9 +107,4 @@ public void writeAvro(String key, IndexedRecord object) throws IOException {
       writeSupport.add(key);
     }
   }
-
-  @Override
-  public long getBytesWritten() {
-    return fs.getBytesWritten(file);
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index 2f4bca81b18dc..4faac22a841fe 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -18,7 +18,13 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.avro.specific.SpecificRecordBase;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
 import org.apache.hudi.avro.model.HoodieInstantInfo;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
@@ -30,12 +36,12 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.DeleteRecord;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -62,13 +68,8 @@
 import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIndexException;
 import org.apache.hudi.exception.HoodieMetadataException;
-
-import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -80,14 +81,19 @@
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
+import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
 import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
 import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
 
 /**
  * Writer implementation backed by an internal hudi table. Partition and file listing are saved within an internal MOR table
@@ -113,7 +119,6 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
   protected boolean enabled;
   protected SerializableConfiguration hadoopConf;
   protected final transient HoodieEngineContext engineContext;
-  // TODO: HUDI-3258 Support secondary key via multiple partitions within a single type
   protected final List<MetadataPartitionType> enabledPartitionTypes;
 
   /**
@@ -259,7 +264,11 @@ private HoodieWriteConfig createMetadataWriteConfig(HoodieWriteConfig writeConfi
             .withInlineCompaction(false)
             .withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax())
             // we will trigger archive manually, to ensure only regular writer invokes it
-            .withAutoArchive(false).build())
+            .withAutoArchive(false)
+            // by default, the HFile does not keep the metadata fields, set up as false
+            // to always use the metadata of the new record.
+            .withPreserveCommitMetadata(false)
+            .build())
         .withParallelism(parallelism, parallelism)
         .withDeleteParallelism(parallelism)
         .withRollbackParallelism(parallelism)
@@ -359,6 +368,36 @@ protected <T extends SpecificRecordBase> void initializeIfNeeded(HoodieTableMeta
                                                                    Option<String> inflightInstantTimestamp) throws IOException {
     HoodieTimer timer = new HoodieTimer().startTimer();
 
+    boolean exists = metadataTableExists(dataMetaClient, actionMetadata);
+
+    if (!exists) {
+      // Initialize for the first time by listing partitions and files directly from the file system
+      if (initializeFromFilesystem(dataMetaClient, inflightInstantTimestamp)) {
+        metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
+      }
+      return;
+    }
+
+    // if metadata table exists, then check if any of the enabled partition types needs to be initialized
+    Set<String> inflightAndCompletedPartitions = getInflightAndCompletedMetadataPartitions(dataMetaClient.getTableConfig());
+    List<MetadataPartitionType> partitionsToInit = this.enabledPartitionTypes.stream()
+        .filter(p -> !inflightAndCompletedPartitions.contains(p.getPartitionPath()) && !MetadataPartitionType.FILES.equals(p))
+        .collect(Collectors.toList());
+
+    // if there are no partitions to initialize or there is a pending operation, then don't initialize in this round
+    if (partitionsToInit.isEmpty() || anyPendingDataInstant(dataMetaClient, inflightInstantTimestamp)) {
+      return;
+    }
+
+    String createInstantTime = getInitialCommitInstantTime(dataMetaClient);
+    initTableMetadata(); // re-init certain flags in BaseTableMetadata
+    initializeEnabledFileGroups(dataMetaClient, createInstantTime, partitionsToInit);
+    initialCommit(createInstantTime, partitionsToInit);
+    updateInitializedPartitionsInTableConfig(partitionsToInit);
+  }
+
+  private <T extends SpecificRecordBase> boolean metadataTableExists(HoodieTableMetaClient dataMetaClient,
+                                                                     Option<T> actionMetadata) throws IOException {
     boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(),
         HoodieTableMetaClient.METAFOLDER_NAME));
     boolean reInitialize = false;
@@ -387,12 +426,7 @@ protected <T extends SpecificRecordBase> void initializeIfNeeded(HoodieTableMeta
       exists = false;
     }
 
-    if (!exists) {
-      // Initialize for the first time by listing partitions and files directly from the file system
-      if (initializeFromFilesystem(dataMetaClient, inflightInstantTimestamp)) {
-        metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
-      }
-    }
+    return  exists;
   }
 
   /**
@@ -447,7 +481,7 @@ private <T extends SpecificRecordBase> boolean isCommitRevertedByInFlightAction(
 
     final String INSTANT_ACTION = (actionMetadata.get() instanceof HoodieRollbackMetadata
         ? HoodieTimeline.ROLLBACK_ACTION
-        : (actionMetadata.get() instanceof HoodieRestoreMetadata ? HoodieTimeline.RESTORE_ACTION : ""));
+        : (actionMetadata.get() instanceof HoodieRestoreMetadata ? HoodieTimeline.RESTORE_ACTION : EMPTY_STRING));
 
     List<String> affectedInstantTimestamps;
     switch (INSTANT_ACTION) {
@@ -484,6 +518,39 @@ private <T extends SpecificRecordBase> boolean isCommitRevertedByInFlightAction(
    */
   private boolean initializeFromFilesystem(HoodieTableMetaClient dataMetaClient,
                                            Option<String> inflightInstantTimestamp) throws IOException {
+    if (anyPendingDataInstant(dataMetaClient, inflightInstantTimestamp)) {
+      return false;
+    }
+
+    String createInstantTime = getInitialCommitInstantTime(dataMetaClient);
+
+    initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
+    initTableMetadata();
+    // if async metadata indexing is enabled,
+    // then only initialize files partition as other partitions will be built using HoodieIndexer
+    List<MetadataPartitionType> enabledPartitionTypes =  new ArrayList<>();
+    if (dataWriteConfig.isMetadataAsyncIndex()) {
+      enabledPartitionTypes.add(MetadataPartitionType.FILES);
+    } else {
+      // all enabled ones should be initialized
+      enabledPartitionTypes = this.enabledPartitionTypes;
+    }
+    initializeEnabledFileGroups(dataMetaClient, createInstantTime, enabledPartitionTypes);
+    initialCommit(createInstantTime, enabledPartitionTypes);
+    updateInitializedPartitionsInTableConfig(enabledPartitionTypes);
+    return true;
+  }
+
+  private String getInitialCommitInstantTime(HoodieTableMetaClient dataMetaClient) {
+    // If there is no commit on the dataset yet, use the SOLO_COMMIT_TIMESTAMP as the instant time for initial commit
+    // Otherwise, we use the timestamp of the latest completed action.
+    String createInstantTime = dataMetaClient.getActiveTimeline().filterCompletedInstants()
+        .getReverseOrderedInstants().findFirst().map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
+    LOG.info("Creating a new metadata table in " + metadataWriteConfig.getBasePath() + " at instant " + createInstantTime);
+    return createInstantTime;
+  }
+
+  private boolean anyPendingDataInstant(HoodieTableMetaClient dataMetaClient, Option<String> inflightInstantTimestamp) {
     ValidationUtils.checkState(enabled, "Metadata table cannot be initialized as it is not enabled");
 
     // We can only initialize if there are no pending operations on the dataset
@@ -496,27 +563,19 @@ private boolean initializeFromFilesystem(HoodieTableMetaClient dataMetaClient,
       metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BOOTSTRAP_ERR_STR, 1));
       LOG.warn("Cannot initialize metadata table as operation(s) are in progress on the dataset: "
           + Arrays.toString(pendingDataInstant.toArray()));
-      return false;
+      return true;
     }
+    return false;
+  }
 
-    // If there is no commit on the dataset yet, use the SOLO_COMMIT_TIMESTAMP as the instant time for initial commit
-    // Otherwise, we use the timestamp of the latest completed action.
-    String createInstantTime = dataMetaClient.getActiveTimeline().filterCompletedInstants()
-        .getReverseOrderedInstants().findFirst().map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
-    LOG.info("Creating a new metadata table in " + metadataWriteConfig.getBasePath() + " at instant " + createInstantTime);
-
-    initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
-    initTableMetadata();
-    initializeEnabledFileGroups(dataMetaClient, createInstantTime);
-
-    // During cold startup, the list of files to be committed can be huge. So creating a HoodieCommitMetadata out
-    // of these large number of files and calling the existing update(HoodieCommitMetadata) function does not scale
-    // well. Hence, we have a special commit just for the initialization scenario.
-    initialCommit(createInstantTime);
-    return true;
+  private void updateInitializedPartitionsInTableConfig(List<MetadataPartitionType> partitionTypes) {
+    Set<String> completedPartitions = getCompletedMetadataPartitions(dataMetaClient.getTableConfig());
+    completedPartitions.addAll(partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet()));
+    dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
+    HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
   }
 
-  private HoodieTableMetaClient initializeMetaClient(boolean populatMetaFields) throws IOException {
+  private HoodieTableMetaClient initializeMetaClient(boolean populateMetaFields) throws IOException {
     return HoodieTableMetaClient.withPropertyBuilder()
         .setTableType(HoodieTableType.MERGE_ON_READ)
         .setTableName(tableName)
@@ -524,7 +583,7 @@ private HoodieTableMetaClient initializeMetaClient(boolean populatMetaFields) th
         .setPayloadClassName(HoodieMetadataPayload.class.getName())
         .setBaseFileFormat(HoodieFileFormat.HFILE.toString())
         .setRecordKeyFields(RECORD_KEY_FIELD_NAME)
-        .setPopulateMetaFields(populatMetaFields)
+        .setPopulateMetaFields(populateMetaFields)
         .setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
         .initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
   }
@@ -549,7 +608,7 @@ private List<DirectoryInfo> listAllPartitions(HoodieTableMetaClient datasetMetaC
       // In each round we will list a section of directories
       int numDirsToList = Math.min(fileListingParallelism, pathsToList.size());
       // List all directories in parallel
-      List<DirectoryInfo> processedDirectories = engineContext.map(pathsToList.subList(0,  numDirsToList), path -> {
+      List<DirectoryInfo> processedDirectories = engineContext.map(pathsToList.subList(0, numDirsToList), path -> {
         FileSystem fs = path.getFileSystem(conf.get());
         String relativeDirPath = FSUtils.getRelativePartitionPath(new Path(datasetBasePath), path);
         return new DirectoryInfo(relativeDirPath, fs.listStatus(path));
@@ -591,13 +650,19 @@ private List<DirectoryInfo> listAllPartitions(HoodieTableMetaClient datasetMetaC
    * @param createInstantTime - Metadata table create instant time
    * @throws IOException
    */
-  private void initializeEnabledFileGroups(HoodieTableMetaClient dataMetaClient, String createInstantTime) throws IOException {
-    for (MetadataPartitionType enabledPartitionType : this.enabledPartitionTypes) {
+  private void initializeEnabledFileGroups(HoodieTableMetaClient dataMetaClient, String createInstantTime, List<MetadataPartitionType> partitionTypes) throws IOException {
+    for (MetadataPartitionType enabledPartitionType : partitionTypes) {
       initializeFileGroups(dataMetaClient, enabledPartitionType, createInstantTime,
           enabledPartitionType.getFileGroupCount());
     }
   }
 
+  public void initializeMetadataPartitions(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> metadataPartitions, String instantTime) throws IOException {
+    for (MetadataPartitionType partitionType : metadataPartitions) {
+      initializeFileGroups(dataMetaClient, partitionType, instantTime, partitionType.getFileGroupCount());
+    }
+  }
+
   /**
    * Initialize file groups for a partition. For file listing, we just have one file group.
    *
@@ -610,13 +675,12 @@ private void initializeEnabledFileGroups(HoodieTableMetaClient dataMetaClient, S
    */
   private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, MetadataPartitionType metadataPartition, String instantTime,
                                     int fileGroupCount) throws IOException {
-
     final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
     blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
     // Archival of data table has a dependency on compaction(base files) in metadata table.
     // It is assumed that as of time Tx of base instant (/compaction time) in metadata table,
     // all commits in data table is in sync with metadata table. So, we always start with log file for any fileGroup.
-    final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0], blockHeader);
+    final HoodieDeleteBlock block = new HoodieDeleteBlock(new DeleteRecord[0], blockHeader);
 
     LOG.info(String.format("Creating %d file groups for partition %s with base fileId %s at instant time %s",
         fileGroupCount, metadataPartition.getPartitionPath(), metadataPartition.getFileIdPrefix(), instantTime));
@@ -641,12 +705,36 @@ private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, Metadata
     }
   }
 
+  public void dropMetadataPartitions(List<MetadataPartitionType> metadataPartitions) throws IOException {
+    Set<String> completedIndexes = getCompletedMetadataPartitions(dataMetaClient.getTableConfig());
+    Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
+
+    for (MetadataPartitionType partitionType : metadataPartitions) {
+      String partitionPath = partitionType.getPartitionPath();
+      // first update table config
+      if (inflightIndexes.contains(partitionPath)) {
+        inflightIndexes.remove(partitionPath);
+        dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightIndexes));
+      } else if (completedIndexes.contains(partitionPath)) {
+        completedIndexes.remove(partitionPath);
+        dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedIndexes));
+      }
+      HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
+      LOG.warn("Deleting Metadata Table partitions: " + partitionPath);
+      dataMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath(), partitionPath), true);
+    }
+  }
+
   private MetadataRecordsGenerationParams getRecordsGenerationParams() {
     return new MetadataRecordsGenerationParams(
-        dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
-        dataWriteConfig.getBloomIndexParallelism(),
-        dataWriteConfig.isMetadataIndexColumnStatsForAllColumnsEnabled(),
-        dataWriteConfig.getColumnStatsIndexParallelism());
+        dataMetaClient,
+        enabledPartitionTypes,
+        dataWriteConfig.getBloomFilterType(),
+        dataWriteConfig.getMetadataBloomFilterIndexParallelism(),
+        dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
+        dataWriteConfig.getColumnStatsIndexParallelism(),
+        dataWriteConfig.getColumnsEnabledForColumnStatsIndex(),
+        dataWriteConfig.getColumnsEnabledForBloomFilterIndex());
   }
 
   /**
@@ -659,20 +747,82 @@ private interface ConvertMetadataFunction {
 
   /**
    * Processes commit metadata from data table and commits to metadata table.
+   *
    * @param instantTime instant time of interest.
    * @param convertMetadataFunction converter function to convert the respective metadata to List of HoodieRecords to be written to metadata table.
    * @param <T> type of commit metadata.
    * @param canTriggerTableService true if table services can be triggered. false otherwise.
    */
   private <T> void processAndCommit(String instantTime, ConvertMetadataFunction convertMetadataFunction, boolean canTriggerTableService) {
+    if (!dataWriteConfig.isMetadataTableEnabled()) {
+      return;
+    }
+    Set<String> partitionsToUpdate = getMetadataPartitionsToUpdate();
+    Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
+    // if indexing is inflight then do not trigger table service
+    boolean doNotTriggerTableService = partitionsToUpdate.stream().anyMatch(inflightIndexes::contains);
+
     if (enabled && metadata != null) {
-      Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap = convertMetadataFunction.convertMetadata();
-      commit(instantTime, partitionRecordsMap, canTriggerTableService);
+      // convert metadata and filter only the entries whose partition path are in partitionsToUpdate
+      Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap = convertMetadataFunction.convertMetadata().entrySet().stream()
+          .filter(entry -> partitionsToUpdate.contains(entry.getKey().getPartitionPath())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+      commit(instantTime, partitionRecordsMap, !doNotTriggerTableService && canTriggerTableService);
+    }
+  }
+
+  private Set<String> getMetadataPartitionsToUpdate() {
+    // fetch partitions to update from table config
+    Set<String> partitionsToUpdate = getCompletedMetadataPartitions(dataMetaClient.getTableConfig());
+    // add inflight indexes as well because the file groups have already been initialized, so writers can log updates
+    partitionsToUpdate.addAll(getInflightMetadataPartitions(dataMetaClient.getTableConfig()));
+    if (!partitionsToUpdate.isEmpty()) {
+      return partitionsToUpdate;
     }
+    // fallback to all enabled partitions if table config returned no partitions
+    return getEnabledPartitionTypes().stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
+  }
+
+  @Override
+  public void buildMetadataPartitions(HoodieEngineContext engineContext, List<HoodieIndexPartitionInfo> indexPartitionInfos) {
+    if (indexPartitionInfos.isEmpty()) {
+      LOG.warn("No partition to index in the plan");
+      return;
+    }
+    String indexUptoInstantTime = indexPartitionInfos.get(0).getIndexUptoInstant();
+    List<MetadataPartitionType> partitionTypes = new ArrayList<>();
+    indexPartitionInfos.forEach(indexPartitionInfo -> {
+      String relativePartitionPath = indexPartitionInfo.getMetadataPartitionPath();
+      LOG.info(String.format("Creating a new metadata index for partition '%s' under path %s upto instant %s",
+          relativePartitionPath, metadataWriteConfig.getBasePath(), indexUptoInstantTime));
+      try {
+        // file group should have already been initialized while scheduling index for this partition
+        if (!dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), relativePartitionPath))) {
+          throw new HoodieIndexException(String.format("File group not initialized for metadata partition: %s, indexUptoInstant: %s. Looks like index scheduling failed!",
+              relativePartitionPath, indexUptoInstantTime));
+        }
+      } catch (IOException e) {
+        throw new HoodieIndexException(String.format("Unable to check whether file group is initialized for metadata partition: %s, indexUptoInstant: %s",
+            relativePartitionPath, indexUptoInstantTime));
+      }
+
+      // return early and populate enabledPartitionTypes correctly (check in initialCommit)
+      MetadataPartitionType partitionType = MetadataPartitionType.valueOf(relativePartitionPath.toUpperCase(Locale.ROOT));
+      if (!enabledPartitionTypes.contains(partitionType)) {
+        throw new HoodieIndexException(String.format("Indexing for metadata partition: %s is not enabled", partitionType));
+      }
+      partitionTypes.add(partitionType);
+    });
+    // before initial commit update inflight indexes in table config
+    Set<String> inflightIndexes = getInflightMetadataPartitions(dataMetaClient.getTableConfig());
+    inflightIndexes.addAll(indexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet()));
+    dataMetaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightIndexes));
+    HoodieTableConfig.update(dataMetaClient.getFs(), new Path(dataMetaClient.getMetaPath()), dataMetaClient.getTableConfig().getProps());
+    initialCommit(indexUptoInstantTime, partitionTypes);
   }
 
   /**
    * Update from {@code HoodieCommitMetadata}.
+   *
    * @param commitMetadata {@code HoodieCommitMetadata}
    * @param instantTime Timestamp at which the commit was performed
    * @param isTableServiceAction {@code true} if commit metadata is pertaining to a table service. {@code false} otherwise.
@@ -772,12 +922,18 @@ protected HoodieData<HoodieRecord> prepRecords(Map<MetadataPartitionType,
 
       List<FileSlice> fileSlices =
           HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
+      if (fileSlices.isEmpty()) {
+        // scheduling of INDEX only initializes the file group and not add commit
+        // so if there are no committed file slices, look for inflight slices
+        fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlicesIncludingInflight(metadataMetaClient, Option.ofNullable(fsView), partitionName);
+      }
       ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount,
           String.format("Invalid number of file groups for partition:%s, found=%d, required=%d",
               partitionName, fileSlices.size(), fileGroupCount));
 
+      List<FileSlice> finalFileSlices = fileSlices;
       HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
-        FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(),
+        FileSlice slice = finalFileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(),
             fileGroupCount));
         r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
         return r;
@@ -843,71 +999,78 @@ protected void cleanIfNecessary(BaseHoodieWriteClient writeClient, String instan
   }
 
   /**
-   * This is invoked to initialize metadata table for a dataset. Bootstrap Commit has special handling mechanism due to its scale compared to
-   * other regular commits.
+   * This is invoked to initialize metadata table for a dataset.
+   * Initial commit has special handling mechanism due to its scale compared to other regular commits.
+   * During cold startup, the list of files to be committed can be huge.
+   * So creating a HoodieCommitMetadata out of these large number of files,
+   * and calling the existing update(HoodieCommitMetadata) function does not scale well.
+   * Hence, we have a special commit just for the initialization scenario.
    */
-  private void initialCommit(String createInstantTime) {
+  private void initialCommit(String createInstantTime, List<MetadataPartitionType> partitionTypes) {
     // List all partitions in the basePath of the containing dataset
     LOG.info("Initializing metadata table by using file listings in " + dataWriteConfig.getBasePath());
     engineContext.setJobStatus(this.getClass().getSimpleName(), "Initializing metadata table by listing files and partitions");
 
-    List<DirectoryInfo> partitionInfoList = listAllPartitions(dataMetaClient);
-    List<String> partitions = new ArrayList<>();
-    AtomicLong totalFiles = new AtomicLong(0);
-    Map<String, Map<String, Long>> partitionToFilesMap = partitionInfoList.stream().map(p -> {
-      final String partitionName = HoodieTableMetadataUtil.getPartition(p.getRelativePath());
-      partitions.add(partitionName);
-      totalFiles.addAndGet(p.getTotalFiles());
-      return Pair.of(partitionName, p.getFileNameToSizeMap());
-    }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
-    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
-
-    // Record which saves the list of all partitions
-    HoodieRecord allPartitionRecord = HoodieMetadataPayload.createPartitionListRecord(partitions);
-    if (partitions.isEmpty()) {
-      // in case of initializing of a fresh table, there won't be any partitions, but we need to make a boostrap commit
-      final HoodieData<HoodieRecord> allPartitionRecordsRDD = engineContext.parallelize(
-          Collections.singletonList(allPartitionRecord), 1);
-      partitionToRecordsMap.put(MetadataPartitionType.FILES, allPartitionRecordsRDD);
-      commit(createInstantTime, partitionToRecordsMap, false);
-      return;
-    }
+    Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
 
-    HoodieData<HoodieRecord> filesPartitionRecords = engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
-    if (!partitionInfoList.isEmpty()) {
-      HoodieData<HoodieRecord> fileListRecords = engineContext.parallelize(partitionInfoList, partitionInfoList.size()).map(partitionInfo -> {
-        Map<String, Long> fileNameToSizeMap = partitionInfo.getFileNameToSizeMap();
-        // filter for files that are part of the completed commits
-        Map<String, Long> validFileNameToSizeMap = fileNameToSizeMap.entrySet().stream().filter(fileSizePair -> {
-          String commitTime = FSUtils.getCommitTime(fileSizePair.getKey());
-          return HoodieTimeline.compareTimestamps(commitTime, HoodieTimeline.LESSER_THAN_OR_EQUALS, createInstantTime);
-        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-
-        // Record which saves files within a partition
-        return HoodieMetadataPayload.createPartitionFilesRecord(
-            HoodieTableMetadataUtil.getPartition(partitionInfo.getRelativePath()), Option.of(validFileNameToSizeMap), Option.empty());
-      });
-      filesPartitionRecords = filesPartitionRecords.union(fileListRecords);
+    List<DirectoryInfo> partitionInfoList = listAllPartitions(dataMetaClient);
+    Map<String, Map<String, Long>> partitionToFilesMap = partitionInfoList.stream()
+        .map(p -> {
+          String partitionName = HoodieTableMetadataUtil.getPartitionIdentifier(p.getRelativePath());
+          return Pair.of(partitionName, p.getFileNameToSizeMap());
+        })
+        .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+
+    int totalDataFilesCount = partitionToFilesMap.values().stream().mapToInt(Map::size).sum();
+    List<String> partitions = new ArrayList<>(partitionToFilesMap.keySet());
+
+    if (partitionTypes.contains(MetadataPartitionType.FILES)) {
+      // Record which saves the list of all partitions
+      HoodieRecord allPartitionRecord = HoodieMetadataPayload.createPartitionListRecord(partitions);
+      HoodieData<HoodieRecord> filesPartitionRecords = getFilesPartitionRecords(createInstantTime, partitionInfoList, allPartitionRecord);
+      ValidationUtils.checkState(filesPartitionRecords.count() == (partitions.size() + 1));
+      partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
     }
-    ValidationUtils.checkState(filesPartitionRecords.count() == (partitions.size() + 1));
-    partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecords);
 
-    if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
+    if (partitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS) && totalDataFilesCount > 0) {
       final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToBloomFilterRecords(
           engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams(), createInstantTime);
       partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, recordsRDD);
     }
 
-    if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
+    if (partitionTypes.contains(MetadataPartitionType.COLUMN_STATS) && totalDataFilesCount > 0) {
       final HoodieData<HoodieRecord> recordsRDD = HoodieTableMetadataUtil.convertFilesToColumnStatsRecords(
           engineContext, Collections.emptyMap(), partitionToFilesMap, getRecordsGenerationParams());
       partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, recordsRDD);
     }
 
-    LOG.info("Committing " + partitions.size() + " partitions and " + totalFiles + " files to metadata");
+    LOG.info("Committing " + partitions.size() + " partitions and " + totalDataFilesCount + " files to metadata");
+
     commit(createInstantTime, partitionToRecordsMap, false);
   }
 
+  private HoodieData<HoodieRecord> getFilesPartitionRecords(String createInstantTime, List<DirectoryInfo> partitionInfoList, HoodieRecord allPartitionRecord) {
+    HoodieData<HoodieRecord> filesPartitionRecords = engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
+    if (partitionInfoList.isEmpty()) {
+      return filesPartitionRecords;
+    }
+
+    HoodieData<HoodieRecord> fileListRecords = engineContext.parallelize(partitionInfoList, partitionInfoList.size()).map(partitionInfo -> {
+      Map<String, Long> fileNameToSizeMap = partitionInfo.getFileNameToSizeMap();
+      // filter for files that are part of the completed commits
+      Map<String, Long> validFileNameToSizeMap = fileNameToSizeMap.entrySet().stream().filter(fileSizePair -> {
+        String commitTime = FSUtils.getCommitTime(fileSizePair.getKey());
+        return HoodieTimeline.compareTimestamps(commitTime, HoodieTimeline.LESSER_THAN_OR_EQUALS, createInstantTime);
+      }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+      // Record which saves files within a partition
+      return HoodieMetadataPayload.createPartitionFilesRecord(
+          HoodieTableMetadataUtil.getPartitionIdentifier(partitionInfo.getRelativePath()), Option.of(validFileNameToSizeMap), Option.empty());
+    });
+
+    return filesPartitionRecords.union(fileListRecords);
+  }
+
   /**
    * A class which represents a directory and the files and directories inside it.
    * <p>
@@ -937,7 +1100,7 @@ public DirectoryInfo(String relativePath, FileStatus[] fileStatus) {
           if (!status.getPath().getName().equals(HoodieTableMetaClient.METAFOLDER_NAME)) {
             this.subDirectories.add(status.getPath());
           }
-        } else if (status.getPath().getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
+        } else if (status.getPath().getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX)) {
           // Presence of partition meta file implies this is a HUDI partition
           this.isHoodiePartition = true;
         } else if (FSUtils.isDataFile(status.getPath())) {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataWriter.java
index 4f5ac027c91eb..83fe186727b32 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataWriter.java
@@ -19,45 +19,87 @@
 package org.apache.hudi.metadata;
 
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
+import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
 
+import java.io.IOException;
 import java.io.Serializable;
+import java.util.List;
 
 /**
  * Interface that supports updating metadata for a given table, as actions complete.
  */
 public interface HoodieTableMetadataWriter extends Serializable, AutoCloseable {
 
+  /**
+   * Builds the given metadata partitions to create index.
+   *
+   * @param engineContext
+   * @param indexPartitionInfos - information about partitions to build such as partition type and base instant time
+   */
+  void buildMetadataPartitions(HoodieEngineContext engineContext, List<HoodieIndexPartitionInfo> indexPartitionInfos);
+
+  /**
+   * Initialize file groups for the given metadata partitions when indexing is requested.
+   *
+   * @param dataMetaClient     - meta client for the data table
+   * @param metadataPartitions - metadata partitions for which file groups needs to be initialized
+   * @param instantTime        - instant time of the index action
+   * @throws IOException
+   */
+  void initializeMetadataPartitions(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> metadataPartitions, String instantTime) throws IOException;
+
+  /**
+   * Drop the given metadata partitions.
+   *
+   * @param metadataPartitions
+   * @throws IOException
+   */
+  void dropMetadataPartitions(List<MetadataPartitionType> metadataPartitions) throws IOException;
+
   /**
    * Update the metadata table due to a COMMIT operation.
-   * @param commitMetadata commit metadata of the operation of interest.
-   * @param instantTime instant time of the commit.
+   *
+   * @param commitMetadata       commit metadata of the operation of interest.
+   * @param instantTime          instant time of the commit.
    * @param isTableServiceAction true if caller is a table service. false otherwise. Only regular write operations can trigger metadata table services and this argument
-   *                       will assist in this.
+   *                             will assist in this.
    */
   void update(HoodieCommitMetadata commitMetadata, String instantTime, boolean isTableServiceAction);
 
   /**
    * Update the metadata table due to a CLEAN operation.
+   *
    * @param cleanMetadata clean metadata of the operation of interest.
-   * @param instantTime instant time of the commit.
+   * @param instantTime   instant time of the commit.
    */
   void update(HoodieCleanMetadata cleanMetadata, String instantTime);
 
   /**
    * Update the metadata table due to a RESTORE operation.
+   *
    * @param restoreMetadata restore metadata of the operation of interest.
-   * @param instantTime instant time of the commit.
+   * @param instantTime     instant time of the commit.
    */
   void update(HoodieRestoreMetadata restoreMetadata, String instantTime);
 
   /**
    * Update the metadata table due to a ROLLBACK operation.
+   *
    * @param rollbackMetadata rollback metadata of the operation of interest.
-   * @param instantTime instant time of the commit.
+   * @param instantTime      instant time of the commit.
    */
   void update(HoodieRollbackMetadata rollbackMetadata, String instantTime);
 
+  /**
+   * Deletes the given metadata partitions. This path reuses DELETE_PARTITION operation.
+   *
+   * @param instantTime - instant time when replacecommit corresponding to the drop will be recorded in the metadata timeline
+   * @param partitions - list of {@link MetadataPartitionType} to drop
+   */
+  void deletePartitions(String instantTime, List<MetadataPartitionType> partitions);
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
index 62a4f089a45b5..f6f73f633ef5d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
@@ -18,11 +18,18 @@
 
 package org.apache.hudi.table;
 
+import org.apache.avro.Schema;
+import org.apache.avro.specific.SpecificRecordBase;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -43,6 +50,7 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -62,27 +70,21 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieInsertException;
+import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.marker.WriteMarkers;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
 import org.apache.hudi.table.storage.HoodieLayoutFactory;
 import org.apache.hudi.table.storage.HoodieStorageLayout;
-
-import org.apache.avro.Schema;
-import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import javax.annotation.Nonnull;
-
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
@@ -94,6 +96,13 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
+import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
+
 /**
  * Abstract implementation of a HoodieTable.
  *
@@ -261,19 +270,6 @@ public abstract HoodieWriteMetadata<O> bulkInsertPrepped(HoodieEngineContext con
    */
   public abstract HoodieWriteMetadata<O> insertOverwriteTable(HoodieEngineContext context, String instantTime, I records);
 
-  /**
-   * Updates Metadata Indexes (like Column Stats index)
-   * TODO rebase onto metadata table (post RFC-27)
-   *
-   * @param context instance of {@link HoodieEngineContext}
-   * @param instantTime instant of the carried operation triggering the update
-   */
-  public abstract void updateMetadataIndexes(
-      @Nonnull HoodieEngineContext context,
-      @Nonnull List<HoodieWriteStat> stats,
-      @Nonnull String instantTime
-  ) throws Exception;
-
   public HoodieWriteConfig getConfig() {
     return config;
   }
@@ -447,7 +443,6 @@ public abstract Option<HoodieClusteringPlan> scheduleClustering(HoodieEngineCont
    */
   public abstract void rollbackBootstrap(HoodieEngineContext context, String instantTime);
 
-
   /**
    * Schedule cleaning for the instant time.
    *
@@ -497,6 +492,25 @@ public abstract HoodieRollbackMetadata rollback(HoodieEngineContext context,
                                                   boolean deleteInstants,
                                                   boolean skipLocking);
 
+  /**
+   * Schedules Indexing for the table to the given instant.
+   *
+   * @param context HoodieEngineContext
+   * @param indexInstantTime Instant time for scheduling index action.
+   * @param partitionsToIndex List of {@link MetadataPartitionType} that should be indexed.
+   * @return HoodieIndexPlan containing metadata partitions and instant upto which they should be indexed.
+   */
+  public abstract Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex);
+
+  /**
+   * Execute requested index action.
+   *
+   * @param context HoodieEngineContext
+   * @param indexInstantTime Instant time for which index action was scheduled.
+   * @return HoodieIndexCommitMetadata containing write stats for each metadata partition.
+   */
+  public abstract Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime);
+
   /**
    * Create a savepoint at the specified instant, so that the table can be restored
    * to this point-in-timeline later if needed.
@@ -746,6 +760,10 @@ public HoodieFileFormat getLogFileFormat() {
     return metaClient.getTableConfig().getLogFileFormat();
   }
 
+  public Option<HoodieFileFormat> getPartitionMetafileFormat() {
+    return metaClient.getTableConfig().getPartitionMetafileFormat();
+  }
+
   public String getBaseFileExtension() {
     return getBaseFileFormat().getFileExtension();
   }
@@ -764,7 +782,7 @@ public HoodieEngineContext getContext() {
    * Get Table metadata writer.
    *
    * @param triggeringInstantTimestamp - The instant that is triggering this metadata write
-   * @return instance of {@link HoodieTableMetadataWriter
+   * @return instance of {@link HoodieTableMetadataWriter}
    */
   public final Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp) {
     return getMetadataWriter(triggeringInstantTimestamp, Option.empty());
@@ -797,6 +815,96 @@ public <R extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetad
     return Option.empty();
   }
 
+  /**
+   * Deletes the metadata table if the writer disables metadata table with hoodie.metadata.enable=false
+   */
+  public void maybeDeleteMetadataTable() {
+    if (shouldExecuteMetadataTableDeletion()) {
+      try {
+        LOG.info("Deleting metadata table because it is disabled in writer.");
+        deleteMetadataTable(config.getBasePath(), context);
+        clearMetadataTablePartitionsConfig(Option.empty(), true);
+      } catch (HoodieMetadataException e) {
+        throw new HoodieException("Failed to delete metadata table.", e);
+      }
+    }
+  }
+
+  /**
+   * Deletes the metadata partition if the writer disables any metadata index.
+   */
+  public void deleteMetadataIndexIfNecessary() {
+    Stream.of(MetadataPartitionType.values()).forEach(partitionType -> {
+      if (shouldDeleteMetadataPartition(partitionType)) {
+        try {
+          LOG.info("Deleting metadata partition because it is disabled in writer: " + partitionType.name());
+          if (metadataPartitionExists(metaClient.getBasePath(), context, partitionType)) {
+            deleteMetadataPartition(metaClient.getBasePath(), context, partitionType);
+          }
+          clearMetadataTablePartitionsConfig(Option.of(partitionType), false);
+        } catch (HoodieMetadataException e) {
+          throw new HoodieException("Failed to delete metadata partition: " + partitionType.name(), e);
+        }
+      }
+    });
+  }
+
+  private boolean shouldDeleteMetadataPartition(MetadataPartitionType partitionType) {
+    // Only delete metadata table partition when all the following conditions are met:
+    // (1) This is data table.
+    // (2) Index corresponding to this metadata partition is disabled in HoodieWriteConfig.
+    // (3) The completed metadata partitions in table config contains this partition.
+    // NOTE: Inflight metadata partitions are not considered as they could have been inflight due to async indexer.
+    if (HoodieTableMetadata.isMetadataTable(metaClient.getBasePath()) || !config.isMetadataTableEnabled()) {
+      return false;
+    }
+    boolean metadataIndexDisabled;
+    switch (partitionType) {
+      // NOTE: FILES partition type is always considered in sync with hoodie.metadata.enable.
+      //       It cannot be the case that metadata is enabled but FILES is disabled.
+      case COLUMN_STATS:
+        metadataIndexDisabled = !config.isMetadataColumnStatsIndexEnabled();
+        break;
+      case BLOOM_FILTERS:
+        metadataIndexDisabled = !config.isMetadataBloomFilterIndexEnabled();
+        break;
+      default:
+        LOG.debug("Not a valid metadata partition type: " + partitionType.name());
+        return false;
+    }
+    return metadataIndexDisabled
+        && getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(partitionType.getPartitionPath());
+  }
+
+  private boolean shouldExecuteMetadataTableDeletion() {
+    // Only execute metadata table deletion when all the following conditions are met
+    // (1) This is data table
+    // (2) Metadata table is disabled in HoodieWriteConfig for the writer
+    // (3) Check `HoodieTableConfig.TABLE_METADATA_PARTITIONS`.  Either the table config
+    // does not exist, or the table config is non-empty indicating that metadata table
+    // partitions are ready to use
+    return !HoodieTableMetadata.isMetadataTable(metaClient.getBasePath())
+        && !config.isMetadataTableEnabled()
+        && (!metaClient.getTableConfig().contains(TABLE_METADATA_PARTITIONS)
+        || !metaClient.getTableConfig().getMetadataPartitions().isEmpty());
+  }
+
+  /**
+   * Clears hoodie.table.metadata.partitions in hoodie.properties
+   */
+  private void clearMetadataTablePartitionsConfig(Option<MetadataPartitionType> partitionType, boolean clearAll) {
+    if (clearAll) {
+      LOG.info("Clear hoodie.table.metadata.partitions in hoodie.properties");
+      metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), EMPTY_STRING);
+      HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), metaClient.getTableConfig().getProps());
+      return;
+    }
+    Set<String> completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig());
+    completedPartitions.remove(partitionType.get().getPartitionPath());
+    metaClient.getTableConfig().setValue(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
+    HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), metaClient.getTableConfig().getProps());
+  }
+
   public HoodieTableMetadata getMetadataTable() {
     return this.metadata;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
index 4ae8009c9a88e..2bb277b05b4f8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
@@ -39,6 +39,7 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.BaseActionExecutor;
 
@@ -72,11 +73,12 @@ public CleanActionExecutor(HoodieEngineContext context, HoodieWriteConfig config
     this.skipLocking = skipLocking;
   }
 
-  static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr) throws IOException {
+  private static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr) throws IOException {
     Path deletePath = new Path(deletePathStr);
     LOG.debug("Working on delete path :" + deletePath);
     try {
-      boolean deleteResult = fs.delete(deletePath, false);
+      boolean isDirectory = fs.isDirectory(deletePath);
+      boolean deleteResult = fs.delete(deletePath, isDirectory);
       if (deleteResult) {
         LOG.debug("Cleaned file at path :" + deletePath);
       }
@@ -87,7 +89,7 @@ static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr) throw
     }
   }
 
-  static Stream<Pair<String, PartitionCleanStat>> deleteFilesFunc(Iterator<Pair<String, CleanFileInfo>> cleanFileInfo, HoodieTable table) {
+  private static Stream<Pair<String, PartitionCleanStat>> deleteFilesFunc(Iterator<Pair<String, CleanFileInfo>> cleanFileInfo, HoodieTable table) {
     Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>();
     FileSystem fs = table.getMetaClient().getFs();
 
@@ -144,6 +146,15 @@ List<HoodieCleanStat> clean(HoodieEngineContext context, HoodieCleanerPlan clean
     Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats
         .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
 
+    List<String> partitionsToBeDeleted = cleanerPlan.getPartitionsToBeDeleted() != null ? cleanerPlan.getPartitionsToBeDeleted() : new ArrayList<>();
+    partitionsToBeDeleted.forEach(entry -> {
+      try {
+        deleteFileAndGetResult(table.getMetaClient().getFs(), table.getMetaClient().getBasePath() + "/" + entry);
+      } catch (IOException e) {
+        LOG.warn("Partition deletion failed " + entry);
+      }
+    });
+
     // Return PartitionCleanStat for each partition passed.
     return cleanerPlan.getFilePathsToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
       PartitionCleanStat partitionCleanStat = partitionCleanStatsMap.containsKey(partitionPath)
@@ -162,6 +173,7 @@ List<HoodieCleanStat> clean(HoodieEngineContext context, HoodieCleanerPlan clean
           .withDeleteBootstrapBasePathPatterns(partitionCleanStat.getDeleteBootstrapBasePathPatterns())
           .withSuccessfulDeleteBootstrapBaseFiles(partitionCleanStat.getSuccessfulDeleteBootstrapBaseFiles())
           .withFailedDeleteBootstrapBaseFiles(partitionCleanStat.getFailedDeleteBootstrapBaseFiles())
+          .isPartitionDeleted(partitionsToBeDeleted.contains(partitionPath))
           .build();
     }).collect(Collectors.toList());
   }
@@ -229,6 +241,14 @@ public HoodieCleanMetadata execute() {
     List<HoodieInstant> pendingCleanInstants = table.getCleanTimeline()
         .filterInflightsAndRequested().getInstants().collect(Collectors.toList());
     if (pendingCleanInstants.size() > 0) {
+      // try to clean old history schema.
+      try {
+        FileBasedInternalSchemaStorageManager fss = new FileBasedInternalSchemaStorageManager(table.getMetaClient());
+        fss.cleanOldFiles(pendingCleanInstants.stream().map(is -> is.getTimestamp()).collect(Collectors.toList()));
+      } catch (Exception e) {
+        // we should not affect original clean logic. Swallow exception and log warn.
+        LOG.warn("failed to clean old history schema");
+      }
       pendingCleanInstants.forEach(hoodieInstant -> {
         if (table.getCleanTimeline().isEmpty(hoodieInstant)) {
           table.getActiveTimeline().deleteEmptyInstantIfExists(hoodieInstant);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
index 86f65cae5ee7b..fb2df582bfe15 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.avro.model.HoodieCleanFileInfo;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.CleanFileInfo;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -108,15 +109,22 @@ HoodieCleanerPlan requestClean(HoodieEngineContext context) {
 
       context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
 
-      Map<String, List<HoodieCleanFileInfo>> cleanOps = context
+      Map<String, Pair<Boolean, List<CleanFileInfo>>> cleanOpsWithPartitionMeta = context
           .map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism)
           .stream()
-          .collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
+          .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+
+      Map<String, List<HoodieCleanFileInfo>> cleanOps = cleanOpsWithPartitionMeta.entrySet().stream()
+          .collect(Collectors.toMap(Map.Entry::getKey,
+              e -> CleanerUtils.convertToHoodieCleanFileInfoList(e.getValue().getValue())));
+
+      List<String> partitionsToDelete = cleanOpsWithPartitionMeta.entrySet().stream().filter(entry -> entry.getValue().getKey()).map(Map.Entry::getKey)
+          .collect(Collectors.toList());
 
       return new HoodieCleanerPlan(earliestInstant
           .map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null),
           config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(),
-          CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
+          CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps, partitionsToDelete);
     } catch (IOException e) {
       throw new HoodieIOException("Failed to schedule clean operation", e);
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 7e56d3456a0a4..79eef43b3c00a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -21,7 +21,6 @@
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieSavepointMetadata;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.CleanFileInfo;
 import org.apache.hudi.common.model.CompactionOperation;
 import org.apache.hudi.common.model.FileSlice;
@@ -45,7 +44,9 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieSavepointException;
+import org.apache.hudi.metadata.FileSystemBackedTableMetadata;
 import org.apache.hudi.table.HoodieTable;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -202,11 +203,18 @@ private List<String> getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata
   /**
    * Scan and list all partitions for cleaning.
    * @return all partitions paths for the dataset.
-   * @throws IOException
    */
   private List<String> getPartitionPathsForFullCleaning() {
     // Go to brute force mode of scanning all partitions
-    return FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), config.getBasePath());
+    try {
+      // Because the partition of BaseTableMetadata has been deleted,
+      // all partition information can only be obtained from FileSystemBackedTableMetadata.
+      FileSystemBackedTableMetadata fsBackedTableMetadata = new FileSystemBackedTableMetadata(context,
+          context.getHadoopConf(), config.getBasePath(), config.shouldAssumeDatePartitioning());
+      return fsBackedTableMetadata.getAllPartitionPaths();
+    } catch (IOException e) {
+      return Collections.emptyList();
+    }
   }
 
   /**
@@ -214,7 +222,7 @@ private List<String> getPartitionPathsForFullCleaning() {
    * policy is useful, if you are simply interested in querying the table, and you don't want too many versions for a
    * single file (i.e run it with versionsRetained = 1)
    */
-  private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitionPath) {
+  private Pair<Boolean, List<CleanFileInfo>> getFilesToCleanKeepingLatestVersions(String partitionPath) {
     LOG.info("Cleaning " + partitionPath + ", retaining latest " + config.getCleanerFileVersionsRetained()
         + " file versions. ");
     List<CleanFileInfo> deletePaths = new ArrayList<>();
@@ -226,7 +234,7 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitio
     // In this scenario, we will assume that once replaced a file group automatically becomes eligible for cleaning completely
     // In other words, the file versions only apply to the active file groups.
     deletePaths.addAll(getReplacedFilesEligibleToClean(savepointedFiles, partitionPath, Option.empty()));
-
+    boolean toDeletePartition = false;
     List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath).collect(Collectors.toList());
     for (HoodieFileGroup fileGroup : fileGroups) {
       int keepVersions = config.getCleanerFileVersionsRetained();
@@ -254,10 +262,14 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitio
         deletePaths.addAll(getCleanFileInfoForSlice(nextSlice));
       }
     }
-    return deletePaths;
+    // if there are no valid file groups for the partition, mark it to be deleted
+    if (fileGroups.isEmpty()) {
+      toDeletePartition = true;
+    }
+    return Pair.of(toDeletePartition, deletePaths);
   }
 
-  private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath) {
+  private Pair<Boolean, List<CleanFileInfo>> getFilesToCleanKeepingLatestCommits(String partitionPath) {
     return getFilesToCleanKeepingLatestCommits(partitionPath, config.getCleanerCommitsRetained(), HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
   }
 
@@ -274,8 +286,11 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
    * retain 10 commits, and commit batch time is 30 mins, then you have 5 hrs of lookback)
    * <p>
    * This policy is the default.
+   *
+   * @return A {@link Pair} whose left is boolean indicating whether partition itself needs to be deleted,
+   *         and right is a list of {@link CleanFileInfo} about the files in the partition that needs to be deleted.
    */
-  private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath, int commitsRetained, HoodieCleaningPolicy policy) {
+  private Pair<Boolean, List<CleanFileInfo>> getFilesToCleanKeepingLatestCommits(String partitionPath, int commitsRetained, HoodieCleaningPolicy policy) {
     LOG.info("Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
     List<CleanFileInfo> deletePaths = new ArrayList<>();
 
@@ -285,6 +300,7 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
         .collect(Collectors.toList());
 
     // determine if we have enough commits, to start cleaning.
+    boolean toDeletePartition = false;
     if (commitTimeline.countInstants() > commitsRetained) {
       Option<HoodieInstant> earliestCommitToRetainOption = getEarliestCommitToRetain();
       HoodieInstant earliestCommitToRetain = earliestCommitToRetainOption.get();
@@ -350,8 +366,12 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
           }
         }
       }
+      // if there are no valid file groups for the partition, mark it to be deleted
+      if (fileGroups.isEmpty()) {
+        toDeletePartition = true;
+      }
     }
-    return deletePaths;
+    return Pair.of(toDeletePartition, deletePaths);
   }
 
   /**
@@ -362,10 +382,10 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
    * @param partitionPath partition path to check
    * @return list of files to clean
    */
-  private List<CleanFileInfo> getFilesToCleanKeepingLatestHours(String partitionPath) {
+  private Pair<Boolean, List<CleanFileInfo>> getFilesToCleanKeepingLatestHours(String partitionPath) {
     return getFilesToCleanKeepingLatestCommits(partitionPath, 0, HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS);
   }
-  
+
   private List<CleanFileInfo> getReplacedFilesEligibleToClean(List<String> savepointedFiles, String partitionPath, Option<HoodieInstant> earliestCommitToRetain) {
     final Stream<HoodieFileGroup> replacedGroups;
     if (earliestCommitToRetain.isPresent()) {
@@ -416,9 +436,9 @@ private List<CleanFileInfo> getCleanFileInfoForSlice(FileSlice nextSlice) {
   /**
    * Returns files to be cleaned for the given partitionPath based on cleaning policy.
    */
-  public List<CleanFileInfo> getDeletePaths(String partitionPath) {
+  public Pair<Boolean, List<CleanFileInfo>> getDeletePaths(String partitionPath) {
     HoodieCleaningPolicy policy = config.getCleanerPolicy();
-    List<CleanFileInfo> deletePaths;
+    Pair<Boolean, List<CleanFileInfo>> deletePaths;
     if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
       deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
     } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
@@ -428,8 +448,10 @@ public List<CleanFileInfo> getDeletePaths(String partitionPath) {
     } else {
       throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
     }
-    LOG.info(deletePaths.size() + " patterns used to delete in partition path:" + partitionPath);
-
+    LOG.info(deletePaths.getValue().size() + " patterns used to delete in partition path:" + partitionPath);
+    if (deletePaths.getKey()) {
+      LOG.info("Partition " + partitionPath + " to be deleted");
+    }
     return deletePaths;
   }
 
@@ -455,7 +477,7 @@ public Option<HoodieInstant> getEarliestCommitToRetain() {
 
   /**
    * Determine if file slice needed to be preserved for pending compaction.
-   * 
+   *
    * @param fileSlice File Slice
    * @return true if file slice needs to be preserved, false otherwise.
    */
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
index b8d5948c1f453..fb07d35928d7c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
@@ -80,6 +80,7 @@ public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I,
   protected final TaskContextSupplier taskContextSupplier;
   protected final TransactionManager txnManager;
   protected Option<Pair<HoodieInstant, Map<String, String>>> lastCompletedTxn;
+  protected Set<String> pendingInflightAndRequestedInstants;
 
   public BaseCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig config,
                                   HoodieTable<T, I, K, O> table, String instantTime, WriteOperationType operationType,
@@ -91,6 +92,8 @@ public BaseCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig c
     // TODO : Remove this once we refactor and move out autoCommit method from here, since the TxnManager is held in {@link BaseHoodieWriteClient}.
     this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
     this.lastCompletedTxn = TransactionUtils.getLastCompletedTxnInstantAndMetadata(table.getMetaClient());
+    this.pendingInflightAndRequestedInstants = TransactionUtils.getInflightAndRequestedInstants(table.getMetaClient());
+    this.pendingInflightAndRequestedInstants.remove(instantTime);
     if (table.getStorageLayout().doesNotSupport(operationType)) {
       throw new UnsupportedOperationException("Executor " + this.getClass().getSimpleName()
           + " is not compatible with table layout " + table.getStorageLayout().getClass().getSimpleName());
@@ -184,7 +187,7 @@ protected void autoCommit(Option<Map<String, String>> extraMetadata, HoodieWrite
       setCommitMetadata(result);
       // reload active timeline so as to get all updates after current transaction have started. hence setting last arg to true.
       TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(),
-          result.getCommitMetadata(), config, this.txnManager.getLastCompletedTransactionOwner(), true);
+          result.getCommitMetadata(), config, this.txnManager.getLastCompletedTransactionOwner(), true, pendingInflightAndRequestedInstants);
       commit(extraMetadata, result);
     } finally {
       this.txnManager.endTransaction(inflightInstant);
@@ -233,6 +236,9 @@ protected HoodieWriteMetadata<HoodieData<WriteStatus>> executeClustering(HoodieC
     table.getActiveTimeline().transitionReplaceRequestedToInflight(instant, Option.empty());
     table.getMetaClient().reloadActiveTimeline();
 
+    // Disable auto commit. Strategy is only expected to write data in new files.
+    config.setValue(HoodieWriteConfig.AUTO_COMMIT_ENABLE, Boolean.FALSE.toString());
+
     final Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
     HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = (
         (ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>>)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
index 2b4a5d1608eec..578cdf0bc7f14 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
@@ -18,14 +18,24 @@
 
 package org.apache.hudi.table.action.commit;
 
+import org.apache.avro.SchemaCompatibility;
+import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.InternalSchemaCache;
 import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.action.InternalSchemaMerger;
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
+import org.apache.hudi.internal.schema.utils.AvroSchemaEvolutionUtils;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
@@ -41,6 +51,8 @@
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
 
 public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
     BaseMergeHelper<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
@@ -78,12 +90,41 @@ public void runMerge(HoodieTable<T, HoodieData<HoodieRecord<T>>, HoodieData<Hood
 
     BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
     HoodieFileReader<GenericRecord> reader = HoodieFileReaderFactory.getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
+
+    Option<InternalSchema> querySchemaOpt = SerDeHelper.fromJson(table.getConfig().getInternalSchema());
+    boolean needToReWriteRecord = false;
+    // TODO support bootstrap
+    if (querySchemaOpt.isPresent() && !baseFile.getBootstrapBaseFile().isPresent()) {
+      // check implicitly add columns, and position reorder(spark sql may change cols order)
+      InternalSchema querySchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(readSchema, querySchemaOpt.get(), true);
+      long commitInstantTime = Long.valueOf(FSUtils.getCommitTime(mergeHandle.getOldFilePath().getName()));
+      InternalSchema writeInternalSchema = InternalSchemaCache.searchSchemaAndCache(commitInstantTime, table.getMetaClient(), table.getConfig().getInternalSchemaCacheEnable());
+      if (writeInternalSchema.isEmptySchema()) {
+        throw new HoodieException(String.format("cannot find file schema for current commit %s", commitInstantTime));
+      }
+      List<String> colNamesFromQuerySchema = querySchema.getAllColsFullName();
+      List<String> colNamesFromWriteSchema = writeInternalSchema.getAllColsFullName();
+      List<String> sameCols = colNamesFromWriteSchema.stream()
+              .filter(f -> colNamesFromQuerySchema.contains(f)
+                      && writeInternalSchema.findIdByName(f) == querySchema.findIdByName(f)
+                      && writeInternalSchema.findIdByName(f) != -1
+                      && writeInternalSchema.findType(writeInternalSchema.findIdByName(f)).equals(querySchema.findType(writeInternalSchema.findIdByName(f)))).collect(Collectors.toList());
+      readSchema = AvroInternalSchemaConverter.convert(new InternalSchemaMerger(writeInternalSchema, querySchema, true, false).mergeSchema(), readSchema.getName());
+      Schema writeSchemaFromFile = AvroInternalSchemaConverter.convert(writeInternalSchema, readSchema.getName());
+      needToReWriteRecord = sameCols.size() != colNamesFromWriteSchema.size()
+              || SchemaCompatibility.checkReaderWriterCompatibility(writeSchemaFromFile, readSchema).getType() == org.apache.avro.SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE;
+    }
+
     try {
       final Iterator<GenericRecord> readerIterator;
       if (baseFile.getBootstrapBaseFile().isPresent()) {
         readerIterator = getMergingIterator(table, mergeHandle, baseFile, reader, readSchema, externalSchemaTransformation);
       } else {
-        readerIterator = reader.getRecordIterator(readSchema);
+        if (needToReWriteRecord) {
+          readerIterator = HoodieAvroUtils.rewriteRecordWithNewSchema(reader.getRecordIterator(), readSchema);
+        } else {
+          readerIterator = reader.getRecordIterator(readSchema);
+        }
       }
 
       ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
index e238d40683b64..d548e07eac8a5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
@@ -44,9 +44,12 @@
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.CompactionUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
 import org.apache.hudi.io.IOUtils;
 import org.apache.hudi.table.HoodieCompactionHandler;
 import org.apache.hudi.table.HoodieTable;
@@ -117,8 +120,10 @@ public HoodieData<WriteStatus> compact(
     // log file.That is because in the case of MergeInto, the config.getSchema may not
     // the same with the table schema.
     try {
-      Schema readerSchema = schemaResolver.getTableAvroSchema(false);
-      config.setSchema(readerSchema.toString());
+      if (StringUtils.isNullOrEmpty(config.getInternalSchema())) {
+        Schema readerSchema = schemaResolver.getTableAvroSchema(false);
+        config.setSchema(readerSchema.toString());
+      }
     } catch (Exception e) {
       // If there is no commit in the table, just ignore the exception.
     }
@@ -145,9 +150,17 @@ public List<WriteStatus> compact(HoodieCompactionHandler compactionHandler,
                                    String instantTime,
                                    TaskContextSupplier taskContextSupplier) throws IOException {
     FileSystem fs = metaClient.getFs();
-
-    Schema readerSchema = HoodieAvroUtils.addMetadataFields(
-        new Schema.Parser().parse(config.getSchema()), config.allowOperationMetadataField());
+    Schema readerSchema;
+    Option<InternalSchema> internalSchemaOption = Option.empty();
+    if (!StringUtils.isNullOrEmpty(config.getInternalSchema())) {
+      readerSchema = new Schema.Parser().parse(config.getSchema());
+      internalSchemaOption = SerDeHelper.fromJson(config.getInternalSchema());
+      // its safe to modify config here, since we running in task side.
+      ((HoodieTable) compactionHandler).getConfig().setDefault(config);
+    } else {
+      readerSchema = HoodieAvroUtils.addMetadataFields(
+          new Schema.Parser().parse(config.getSchema()), config.allowOperationMetadataField());
+    }
     LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames()
         + " for commit " + instantTime);
     // TODO - FIX THIS
@@ -172,6 +185,7 @@ public List<WriteStatus> compact(HoodieCompactionHandler compactionHandler,
         .withLogFilePaths(logFiles)
         .withReaderSchema(readerSchema)
         .withLatestInstantTime(maxInstantTime)
+        .withInternalSchema(internalSchemaOption.orElse(InternalSchema.getEmptyInternalSchema()))
         .withMaxMemorySizeInBytes(maxMemoryPerCompaction)
         .withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled())
         .withReverseReader(config.getCompactionReverseLogReadEnabled())
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java
index 5e3005b22fb23..24c0dbc80ed80 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java
@@ -30,8 +30,11 @@
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.CompactionUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.InternalSchemaCache;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieCompactionException;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
 import org.apache.hudi.table.HoodieCompactionHandler;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.BaseActionExecutor;
@@ -70,8 +73,19 @@ public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
       HoodieCompactionPlan compactionPlan =
           CompactionUtils.getCompactionPlan(table.getMetaClient(), instantTime);
 
+      // try to load internalSchema to support schema Evolution
+      HoodieWriteConfig configCopy = config;
+      Pair<Option<String>, Option<String>> schemaPair = InternalSchemaCache
+          .getInternalSchemaAndAvroSchemaForClusteringAndCompaction(table.getMetaClient(), instantTime);
+      if (schemaPair.getLeft().isPresent() && schemaPair.getRight().isPresent()) {
+        // should not influence the original config, just copy it
+        configCopy = HoodieWriteConfig.newBuilder().withProperties(config.getProps()).build();
+        configCopy.setInternalSchemaString(schemaPair.getLeft().get());
+        configCopy.setSchema(schemaPair.getRight().get());
+      }
+
       HoodieData<WriteStatus> statuses = compactor.compact(
-          context, compactionPlan, table, config, instantTime, compactionHandler);
+          context, compactionPlan, table, configCopy, instantTime, compactionHandler);
 
       compactor.maybePersist(statuses, config);
       context.setJobStatus(this.getClass().getSimpleName(), "Preparing compaction metadata");
@@ -81,7 +95,10 @@ public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
         metadata.addWriteStat(stat.getPartitionPath(), stat);
       }
       metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, config.getSchema());
-
+      if (schemaPair.getLeft().isPresent()) {
+        metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, schemaPair.getLeft().get());
+        metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, schemaPair.getRight().get());
+      }
       compactionMetadata.setWriteStatuses(statuses);
       compactionMetadata.setCommitted(false);
       compactionMetadata.setCommitMetadata(Option.of(metadata));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
new file mode 100644
index 0000000000000..8c86a298f8a4b
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.table.action.index;
+
+import org.apache.hudi.avro.model.HoodieCleanMetadata;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
+import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRollbackMetadata;
+import org.apache.hudi.client.transaction.TransactionManager;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.util.CleanerUtils;
+import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.HoodieTimer;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIndexException;
+import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+import org.apache.hudi.metadata.MetadataPartitionType;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.BaseActionExecutor;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.model.WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL;
+import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
+import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS_INFLIGHT;
+import static org.apache.hudi.common.table.timeline.HoodieInstant.State.COMPLETED;
+import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.CLEAN_ACTION;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.INDEXING_ACTION;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.RESTORE_ACTION;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION;
+import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
+import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
+
+/**
+ * Reads the index plan and executes the plan.
+ * It also reconciles updates on data timeline while indexing was in progress.
+ */
+public class RunIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexCommitMetadata>> {
+
+  private static final Logger LOG = LogManager.getLogger(RunIndexActionExecutor.class);
+  private static final Integer INDEX_COMMIT_METADATA_VERSION_1 = 1;
+  private static final Integer LATEST_INDEX_COMMIT_METADATA_VERSION = INDEX_COMMIT_METADATA_VERSION_1;
+  private static final int MAX_CONCURRENT_INDEXING = 1;
+  private static final int TIMELINE_RELOAD_INTERVAL_MILLIS = 5000;
+
+  // we use this to update the latest instant in data timeline that has been indexed in metadata table
+  // this needs to be volatile as it can be updated in the IndexingCheckTask spawned by this executor
+  // assumption is that only one indexer can execute at a time
+  private volatile String currentCaughtupInstant;
+
+  private final TransactionManager txnManager;
+
+  public RunIndexActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable<T, I, K, O> table, String instantTime) {
+    super(context, config, table, instantTime);
+    this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
+  }
+
+  @Override
+  public Option<HoodieIndexCommitMetadata> execute() {
+    HoodieTimer indexTimer = new HoodieTimer();
+    indexTimer.startTimer();
+
+    HoodieInstant indexInstant = validateAndGetIndexInstant();
+    // read HoodieIndexPlan
+    HoodieIndexPlan indexPlan;
+    try {
+      indexPlan = TimelineMetadataUtils.deserializeIndexPlan(table.getActiveTimeline().readIndexPlanAsBytes(indexInstant).get());
+    } catch (IOException e) {
+      throw new HoodieIndexException("Failed to read the index plan for instant: " + indexInstant);
+    }
+    List<HoodieIndexPartitionInfo> indexPartitionInfos = indexPlan.getIndexPartitionInfos();
+    try {
+      if (indexPartitionInfos == null || indexPartitionInfos.isEmpty()) {
+        throw new HoodieIndexException(String.format("No partitions to index for instant: %s", instantTime));
+      }
+      // ensure the metadata partitions for the requested indexes are not already available (or inflight)
+      Set<String> indexesInflightOrCompleted = getInflightAndCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
+      Set<String> requestedPartitions = indexPartitionInfos.stream()
+          .map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet());
+      requestedPartitions.retainAll(indexesInflightOrCompleted);
+      if (!requestedPartitions.isEmpty()) {
+        throw new HoodieIndexException(String.format("Following partitions already exist or inflight: %s", requestedPartitions));
+      }
+
+      // transition requested indexInstant to inflight
+      table.getActiveTimeline().transitionIndexRequestedToInflight(indexInstant, Option.empty());
+      // start indexing for each partition
+      HoodieTableMetadataWriter metadataWriter = table.getMetadataWriter(instantTime)
+          .orElseThrow(() -> new HoodieIndexException(String.format("Could not get metadata writer to run index action for instant: %s", instantTime)));
+      // this will only build index upto base instant as generated by the plan, we will be doing catchup later
+      String indexUptoInstant = indexPartitionInfos.get(0).getIndexUptoInstant();
+      LOG.info("Starting Index Building with base instant: " + indexUptoInstant);
+      metadataWriter.buildMetadataPartitions(context, indexPartitionInfos);
+
+      // get remaining instants to catchup
+      List<HoodieInstant> instantsToCatchup = getInstantsToCatchup(indexUptoInstant);
+      LOG.info("Total remaining instants to index: " + instantsToCatchup.size());
+
+      // reconcile with metadata table timeline
+      String metadataBasePath = getMetadataTableBasePath(table.getMetaClient().getBasePath());
+      HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataBasePath).build();
+      Set<String> metadataCompletedTimestamps = getCompletedArchivedAndActiveInstantsAfter(indexUptoInstant, metadataMetaClient).stream()
+          .map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
+
+      // index catchup for all remaining instants with a timeout
+      currentCaughtupInstant = indexUptoInstant;
+      catchupWithInflightWriters(metadataWriter, instantsToCatchup, metadataMetaClient, metadataCompletedTimestamps);
+      // save index commit metadata and update table config
+      List<HoodieIndexPartitionInfo> finalIndexPartitionInfos = indexPartitionInfos.stream()
+          .map(info -> new HoodieIndexPartitionInfo(
+              info.getVersion(),
+              info.getMetadataPartitionPath(),
+              currentCaughtupInstant))
+          .collect(Collectors.toList());
+      HoodieIndexCommitMetadata indexCommitMetadata = HoodieIndexCommitMetadata.newBuilder()
+          .setVersion(LATEST_INDEX_COMMIT_METADATA_VERSION).setIndexPartitionInfos(finalIndexPartitionInfos).build();
+      updateTableConfigAndTimeline(indexInstant, finalIndexPartitionInfos, indexCommitMetadata);
+      return Option.of(indexCommitMetadata);
+    } catch (IOException e) {
+      // abort gracefully
+      abort(indexInstant, indexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet()));
+      throw new HoodieIndexException(String.format("Unable to index instant: %s", indexInstant));
+    }
+  }
+
+  private void abort(HoodieInstant indexInstant, Set<String> requestedPartitions) {
+    Set<String> inflightPartitions = getInflightMetadataPartitions(table.getMetaClient().getTableConfig());
+    Set<String> completedPartitions = getCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
+    // update table config
+    requestedPartitions.forEach(partition -> {
+      inflightPartitions.remove(partition);
+      completedPartitions.remove(partition);
+    });
+    table.getMetaClient().getTableConfig().setValue(TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightPartitions));
+    table.getMetaClient().getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
+    HoodieTableConfig.update(table.getMetaClient().getFs(), new Path(table.getMetaClient().getMetaPath()), table.getMetaClient().getTableConfig().getProps());
+
+    // delete metadata partition
+    requestedPartitions.forEach(partition -> {
+      MetadataPartitionType partitionType = MetadataPartitionType.valueOf(partition.toUpperCase(Locale.ROOT));
+      if (metadataPartitionExists(table.getMetaClient().getBasePath(), context, partitionType)) {
+        deleteMetadataPartition(table.getMetaClient().getBasePath(), context, partitionType);
+      }
+    });
+
+    // delete inflight instant
+    table.getMetaClient().reloadActiveTimeline().deleteInstantFileIfExists(HoodieTimeline.getIndexInflightInstant(indexInstant.getTimestamp()));
+  }
+
+  private List<HoodieInstant> getInstantsToCatchup(String indexUptoInstant) {
+    // since only write timeline was considered while scheduling index, which gives us the indexUpto instant
+    // here we consider other valid actions to pick catchupStart instant
+    Set<String> validActions = CollectionUtils.createSet(CLEAN_ACTION, RESTORE_ACTION, ROLLBACK_ACTION);
+    Option<HoodieInstant> catchupStartInstant = table.getMetaClient().reloadActiveTimeline()
+        .getTimelineOfActions(validActions)
+        .filterInflightsAndRequested()
+        .findInstantsBefore(indexUptoInstant)
+        .firstInstant();
+    // get all instants since the plan completed (both from active timeline and archived timeline)
+    List<HoodieInstant> instantsToIndex;
+    if (catchupStartInstant.isPresent()) {
+      instantsToIndex = getRemainingArchivedAndActiveInstantsSince(catchupStartInstant.get().getTimestamp(), table.getMetaClient());
+    } else {
+      instantsToIndex = getRemainingArchivedAndActiveInstantsSince(indexUptoInstant, table.getMetaClient());
+    }
+    return instantsToIndex;
+  }
+
+  private HoodieInstant validateAndGetIndexInstant() {
+    // ensure lock provider configured
+    if (!config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl() || StringUtils.isNullOrEmpty(config.getLockProviderClass())) {
+      throw new HoodieIndexException(String.format("Need to set %s as %s and configure lock provider class",
+          WRITE_CONCURRENCY_MODE.key(), OPTIMISTIC_CONCURRENCY_CONTROL.name()));
+    }
+
+    return table.getActiveTimeline()
+        .filterPendingIndexTimeline()
+        .filter(instant -> instant.getTimestamp().equals(instantTime) && REQUESTED.equals(instant.getState()))
+        .lastInstant()
+        .orElseThrow(() -> new HoodieIndexException(String.format("No requested index instant found: %s", instantTime)));
+  }
+
+  private void updateTableConfigAndTimeline(HoodieInstant indexInstant,
+                                            List<HoodieIndexPartitionInfo> finalIndexPartitionInfos,
+                                            HoodieIndexCommitMetadata indexCommitMetadata) throws IOException {
+    try {
+      // update the table config and timeline in a lock as there could be another indexer running
+      txnManager.beginTransaction();
+      updateMetadataPartitionsTableConfig(table.getMetaClient(),
+          finalIndexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet()));
+      table.getActiveTimeline().saveAsComplete(
+          new HoodieInstant(true, INDEXING_ACTION, indexInstant.getTimestamp()),
+          TimelineMetadataUtils.serializeIndexCommitMetadata(indexCommitMetadata));
+    } finally {
+      txnManager.endTransaction();
+    }
+  }
+
+  private void catchupWithInflightWriters(HoodieTableMetadataWriter metadataWriter, List<HoodieInstant> instantsToIndex,
+                                          HoodieTableMetaClient metadataMetaClient, Set<String> metadataCompletedTimestamps) {
+    ExecutorService executorService = Executors.newFixedThreadPool(MAX_CONCURRENT_INDEXING);
+    Future<?> indexingCatchupTaskFuture = executorService.submit(
+        new IndexingCatchupTask(metadataWriter, instantsToIndex, metadataCompletedTimestamps, table.getMetaClient(), metadataMetaClient));
+    try {
+      LOG.info("Starting index catchup task");
+      indexingCatchupTaskFuture.get(config.getIndexingCheckTimeoutSeconds(), TimeUnit.SECONDS);
+    } catch (Exception e) {
+      indexingCatchupTaskFuture.cancel(true);
+      throw new HoodieIndexException(String.format("Index catchup failed. Current indexed instant = %s. Aborting!", currentCaughtupInstant), e);
+    } finally {
+      executorService.shutdownNow();
+    }
+  }
+
+  private static List<HoodieInstant> getRemainingArchivedAndActiveInstantsSince(String instant, HoodieTableMetaClient metaClient) {
+    List<HoodieInstant> remainingInstantsToIndex = metaClient.getArchivedTimeline().getInstants()
+        .filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), GREATER_THAN_OR_EQUALS, instant))
+        .filter(i -> !INDEXING_ACTION.equals(i.getAction()))
+        .collect(Collectors.toList());
+    remainingInstantsToIndex.addAll(metaClient.getActiveTimeline().findInstantsAfter(instant).getInstants()
+        .filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(), GREATER_THAN_OR_EQUALS, instant))
+        .filter(i -> !INDEXING_ACTION.equals(i.getAction()))
+        .collect(Collectors.toList()));
+    return remainingInstantsToIndex;
+  }
+
+  private static List<HoodieInstant> getCompletedArchivedAndActiveInstantsAfter(String instant, HoodieTableMetaClient metaClient) {
+    List<HoodieInstant> completedInstants = metaClient.getArchivedTimeline().filterCompletedInstants().findInstantsAfter(instant)
+        .getInstants().filter(i -> !INDEXING_ACTION.equals(i.getAction())).collect(Collectors.toList());
+    completedInstants.addAll(metaClient.reloadActiveTimeline().filterCompletedInstants().findInstantsAfter(instant)
+        .getInstants().filter(i -> !INDEXING_ACTION.equals(i.getAction())).collect(Collectors.toList()));
+    return completedInstants;
+  }
+
+  private void updateMetadataPartitionsTableConfig(HoodieTableMetaClient metaClient, Set<String> metadataPartitions) {
+    // remove from inflight and update completed indexes
+    Set<String> inflightPartitions = getInflightMetadataPartitions(metaClient.getTableConfig());
+    Set<String> completedPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig());
+    inflightPartitions.removeAll(metadataPartitions);
+    completedPartitions.addAll(metadataPartitions);
+    // update table config
+    metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS_INFLIGHT.key(), String.join(",", inflightPartitions));
+    metaClient.getTableConfig().setValue(TABLE_METADATA_PARTITIONS.key(), String.join(",", completedPartitions));
+    HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), metaClient.getTableConfig().getProps());
+  }
+
+  /**
+   * Indexing check runs for instants that completed after the base instant (in the index plan).
+   * It will check if these later instants have logged updates to metadata table or not.
+   * If not, then it will do the update. If a later instant is inflight, it will wait until it is completed or the task times out.
+   */
+  class IndexingCatchupTask implements Runnable {
+
+    private final HoodieTableMetadataWriter metadataWriter;
+    private final List<HoodieInstant> instantsToIndex;
+    private final Set<String> metadataCompletedInstants;
+    private final HoodieTableMetaClient metaClient;
+    private final HoodieTableMetaClient metadataMetaClient;
+
+    IndexingCatchupTask(HoodieTableMetadataWriter metadataWriter,
+                        List<HoodieInstant> instantsToIndex,
+                        Set<String> metadataCompletedInstants,
+                        HoodieTableMetaClient metaClient,
+                        HoodieTableMetaClient metadataMetaClient) {
+      this.metadataWriter = metadataWriter;
+      this.instantsToIndex = instantsToIndex;
+      this.metadataCompletedInstants = metadataCompletedInstants;
+      this.metaClient = metaClient;
+      this.metadataMetaClient = metadataMetaClient;
+    }
+
+    @Override
+    public void run() {
+      for (HoodieInstant instant : instantsToIndex) {
+        // metadata index already updated for this instant
+        if (!metadataCompletedInstants.isEmpty() && metadataCompletedInstants.contains(instant.getTimestamp())) {
+          currentCaughtupInstant = instant.getTimestamp();
+          continue;
+        }
+        while (!instant.isCompleted()) {
+          try {
+            LOG.warn("instant not completed, reloading timeline " + instant);
+            // reload timeline and fetch instant details again wait until timeout
+            String instantTime = instant.getTimestamp();
+            Option<HoodieInstant> currentInstant = metaClient.reloadActiveTimeline()
+                .filterCompletedInstants().filter(i -> i.getTimestamp().equals(instantTime)).firstInstant();
+            instant = currentInstant.orElse(instant);
+            // so that timeline is not reloaded very frequently
+            Thread.sleep(TIMELINE_RELOAD_INTERVAL_MILLIS);
+          } catch (InterruptedException e) {
+            throw new HoodieIndexException(String.format("Thread interrupted while running indexing check for instant: %s", instant), e);
+          }
+        }
+        // if instant completed, ensure that there was metadata commit, else update metadata for this completed instant
+        if (COMPLETED.equals(instant.getState())) {
+          String instantTime = instant.getTimestamp();
+          Option<HoodieInstant> metadataInstant = metadataMetaClient.reloadActiveTimeline()
+              .filterCompletedInstants().filter(i -> i.getTimestamp().equals(instantTime)).firstInstant();
+          if (metadataInstant.isPresent()) {
+            currentCaughtupInstant = instantTime;
+            continue;
+          }
+          try {
+            // we need take a lock here as inflight writer could also try to update the timeline
+            txnManager.beginTransaction(Option.of(instant), Option.empty());
+            LOG.info("Updating metadata table for instant: " + instant);
+            switch (instant.getAction()) {
+              // TODO: see if this can be moved to metadata writer itself
+              case HoodieTimeline.COMMIT_ACTION:
+              case HoodieTimeline.DELTA_COMMIT_ACTION:
+              case HoodieTimeline.REPLACE_COMMIT_ACTION:
+                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
+                    table.getActiveTimeline().getInstantDetails(instant).get(), HoodieCommitMetadata.class);
+                // do not trigger any table service as partition is not fully built out yet
+                metadataWriter.update(commitMetadata, instant.getTimestamp(), false);
+                break;
+              case CLEAN_ACTION:
+                HoodieCleanMetadata cleanMetadata = CleanerUtils.getCleanerMetadata(table.getMetaClient(), instant);
+                metadataWriter.update(cleanMetadata, instant.getTimestamp());
+                break;
+              case RESTORE_ACTION:
+                HoodieRestoreMetadata restoreMetadata = TimelineMetadataUtils.deserializeHoodieRestoreMetadata(
+                    table.getActiveTimeline().getInstantDetails(instant).get());
+                metadataWriter.update(restoreMetadata, instant.getTimestamp());
+                break;
+              case ROLLBACK_ACTION:
+                HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(
+                    table.getActiveTimeline().getInstantDetails(instant).get());
+                metadataWriter.update(rollbackMetadata, instant.getTimestamp());
+                break;
+              default:
+                throw new IllegalStateException("Unexpected value: " + instant.getAction());
+            }
+          } catch (IOException e) {
+            throw new HoodieIndexException(String.format("Could not update metadata partition for instant: %s", instant), e);
+          } finally {
+            txnManager.endTransaction(Option.of(instant));
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
new file mode 100644
index 0000000000000..5afebee8a1d7f
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.table.action.index;
+
+import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
+import org.apache.hudi.client.transaction.TransactionManager;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieIndexException;
+import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+import org.apache.hudi.metadata.MetadataPartitionType;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.BaseActionExecutor;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.model.WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL;
+import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getInflightAndCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
+
+/**
+ * Schedules INDEX action.
+ * <li>
+ * 1. Fetch last completed instant on data timeline.
+ * 2. Write the index plan to the <instant>.index.requested.
+ * 3. Initialize file groups for the enabled partition types within a transaction.
+ * </li>
+ */
+public class ScheduleIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexPlan>> {
+
+  private static final Logger LOG = LogManager.getLogger(ScheduleIndexActionExecutor.class);
+  private static final Integer INDEX_PLAN_VERSION_1 = 1;
+  private static final Integer LATEST_INDEX_PLAN_VERSION = INDEX_PLAN_VERSION_1;
+
+  private final List<MetadataPartitionType> partitionIndexTypes;
+  private final TransactionManager txnManager;
+
+  public ScheduleIndexActionExecutor(HoodieEngineContext context,
+                                     HoodieWriteConfig config,
+                                     HoodieTable<T, I, K, O> table,
+                                     String instantTime,
+                                     List<MetadataPartitionType> partitionIndexTypes) {
+    super(context, config, table, instantTime);
+    this.partitionIndexTypes = partitionIndexTypes;
+    this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
+  }
+
+  @Override
+  public Option<HoodieIndexPlan> execute() {
+    validateBeforeScheduling();
+    // make sure that it is idempotent, check with previously pending index operations.
+    Set<String> indexesInflightOrCompleted = getInflightAndCompletedMetadataPartitions(table.getMetaClient().getTableConfig());
+    Set<String> requestedPartitions = partitionIndexTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
+    requestedPartitions.removeAll(indexesInflightOrCompleted);
+    if (!requestedPartitions.isEmpty()) {
+      LOG.warn(String.format("Following partitions already exist or inflight: %s. Going to index only these partitions: %s",
+          indexesInflightOrCompleted, requestedPartitions));
+    } else {
+      LOG.error("All requested index types are inflight or completed: " + partitionIndexTypes);
+      return Option.empty();
+    }
+    List<MetadataPartitionType> finalPartitionsToIndex = partitionIndexTypes.stream()
+        .filter(p -> requestedPartitions.contains(p.getPartitionPath())).collect(Collectors.toList());
+    final HoodieInstant indexInstant = HoodieTimeline.getIndexRequestedInstant(instantTime);
+    try {
+      this.txnManager.beginTransaction(Option.of(indexInstant), Option.empty());
+      // get last completed instant
+      Option<HoodieInstant> indexUptoInstant = table.getActiveTimeline().getContiguousCompletedWriteTimeline().lastInstant();
+      if (indexUptoInstant.isPresent()) {
+        // start initializing file groups
+        // in case FILES partition itself was not initialized before (i.e. metadata was never enabled), this will initialize synchronously
+        HoodieTableMetadataWriter metadataWriter = table.getMetadataWriter(instantTime)
+            .orElseThrow(() -> new HoodieIndexException(String.format("Could not get metadata writer to initialize filegroups for indexing for instant: %s", instantTime)));
+        metadataWriter.initializeMetadataPartitions(table.getMetaClient(), finalPartitionsToIndex, indexInstant.getTimestamp());
+
+        // for each partitionToIndex add that time to the plan
+        List<HoodieIndexPartitionInfo> indexPartitionInfos = finalPartitionsToIndex.stream()
+            .map(p -> new HoodieIndexPartitionInfo(LATEST_INDEX_PLAN_VERSION, p.getPartitionPath(), indexUptoInstant.get().getTimestamp()))
+            .collect(Collectors.toList());
+        HoodieIndexPlan indexPlan = new HoodieIndexPlan(LATEST_INDEX_PLAN_VERSION, indexPartitionInfos);
+        // update data timeline with requested instant
+        table.getActiveTimeline().saveToPendingIndexAction(indexInstant, TimelineMetadataUtils.serializeIndexPlan(indexPlan));
+        return Option.of(indexPlan);
+      }
+    } catch (IOException e) {
+      LOG.error("Could not initialize file groups", e);
+      // abort gracefully
+      abort(indexInstant);
+      throw new HoodieIOException(e.getMessage(), e);
+    } finally {
+      this.txnManager.endTransaction(Option.of(indexInstant));
+    }
+
+    return Option.empty();
+  }
+
+  private void validateBeforeScheduling() {
+    if (!EnumSet.allOf(MetadataPartitionType.class).containsAll(partitionIndexTypes)) {
+      throw new HoodieIndexException("Not all index types are valid: " + partitionIndexTypes);
+    }
+    // ensure lock provider configured
+    if (!config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl() || StringUtils.isNullOrEmpty(config.getLockProviderClass())) {
+      throw new HoodieIndexException(String.format("Need to set %s as %s and configure lock provider class",
+          WRITE_CONCURRENCY_MODE.key(), OPTIMISTIC_CONCURRENCY_CONTROL.name()));
+    }
+  }
+
+  private void abort(HoodieInstant indexInstant) {
+    // delete metadata partition
+    partitionIndexTypes.forEach(partitionType -> {
+      if (metadataPartitionExists(table.getMetaClient().getBasePath(), context, partitionType)) {
+        deleteMetadataPartition(table.getMetaClient().getBasePath(), context, partitionType);
+      }
+    });
+    // delete requested instant
+    table.getMetaClient().reloadActiveTimeline().deleteInstantFileIfExists(indexInstant);
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
index 9025623e86916..1fac279f8ec2c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
@@ -136,6 +136,9 @@ private HoodieRestoreMetadata finishRestore(Map<String, List<HoodieRollbackMetad
         .filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), restoreInstantTime))
         .collect(Collectors.toList());
     instantsToRollback.forEach(entry -> {
+      if (entry.isCompleted()) {
+        table.getActiveTimeline().deleteCompletedRollback(entry);
+      }
       table.getActiveTimeline().deletePending(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION, entry.getTimestamp()));
       table.getActiveTimeline().deletePending(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.ROLLBACK_ACTION, entry.getTimestamp()));
     });
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
index 9d5895de83b17..8e34f0fe59dac 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
@@ -61,13 +61,15 @@ public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload,
   private final TransactionManager txnManager;
   private final boolean skipLocking;
 
+  protected HoodieInstant resolvedInstant;
+
   public BaseRollbackActionExecutor(HoodieEngineContext context,
-      HoodieWriteConfig config,
-      HoodieTable<T, I, K, O> table,
-      String instantTime,
-      HoodieInstant instantToRollback,
-      boolean deleteInstants,
-      boolean skipLocking) {
+                                    HoodieWriteConfig config,
+                                    HoodieTable<T, I, K, O> table,
+                                    String instantTime,
+                                    HoodieInstant instantToRollback,
+                                    boolean deleteInstants,
+                                    boolean skipLocking) {
     this(context, config, table, instantTime, instantToRollback, deleteInstants,
         false, config.shouldRollbackUsingMarkers(), skipLocking);
   }
@@ -83,6 +85,7 @@ public BaseRollbackActionExecutor(HoodieEngineContext context,
       boolean skipLocking) {
     super(context, config, table, instantTime);
     this.instantToRollback = instantToRollback;
+    this.resolvedInstant = instantToRollback;
     this.deleteInstants = deleteInstants;
     this.skipTimelinePublish = skipTimelinePublish;
     this.useMarkerBasedStrategy = useMarkerBasedStrategy;
@@ -118,9 +121,7 @@ private HoodieRollbackMetadata runRollback(HoodieTable<T, I, K, O> table, Hoodie
         Option.of(rollbackTimer.endTimer()),
         Collections.singletonList(instantToRollback),
         stats);
-    if (!skipTimelinePublish) {
-      finishRollback(inflightInstant, rollbackMetadata);
-    }
+    finishRollback(inflightInstant, rollbackMetadata);
 
     // Finally, remove the markers post rollback.
     WriteMarkersFactory.get(config.getMarkersType(), table, instantToRollback.getTimestamp())
@@ -185,7 +186,12 @@ private void validateRollbackCommitSequence() {
         }
       }
 
-      List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
+      List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().filter(instant -> {
+        if (!instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
+          return true;
+        }
+        return !ClusteringUtils.isPendingClusteringInstant(table.getMetaClient(), instant);
+      }).map(HoodieInstant::getTimestamp)
           .collect(Collectors.toList());
       if ((instantTimeToRollback != null) && !inflights.isEmpty()
           && (inflights.indexOf(instantTimeToRollback) != inflights.size() - 1)) {
@@ -237,18 +243,32 @@ protected List<HoodieRollbackStat> executeRollback(HoodieInstant instantToRollba
   }
 
   protected void finishRollback(HoodieInstant inflightInstant, HoodieRollbackMetadata rollbackMetadata) throws HoodieIOException {
+    boolean enableLocking = (!skipLocking && !skipTimelinePublish);
     try {
-      if (!skipLocking) {
+      if (enableLocking) {
         this.txnManager.beginTransaction(Option.empty(), Option.empty());
       }
-      writeTableMetadata(rollbackMetadata);
-      table.getActiveTimeline().transitionRollbackInflightToComplete(inflightInstant,
-          TimelineMetadataUtils.serializeRollbackMetadata(rollbackMetadata));
-      LOG.info("Rollback of Commits " + rollbackMetadata.getCommitsRollback() + " is complete");
+
+      // If publish the rollback to the timeline, we first write the rollback metadata
+      // to metadata table
+      if (!skipTimelinePublish) {
+        writeTableMetadata(rollbackMetadata);
+      }
+
+      // Then we delete the inflight instant in the data table timeline if enabled
+      deleteInflightAndRequestedInstant(deleteInstants, table.getActiveTimeline(), resolvedInstant);
+
+      // If publish the rollback to the timeline, we finally transition the inflight rollback
+      // to complete in the data table timeline
+      if (!skipTimelinePublish) {
+        table.getActiveTimeline().transitionRollbackInflightToComplete(inflightInstant,
+            TimelineMetadataUtils.serializeRollbackMetadata(rollbackMetadata));
+        LOG.info("Rollback of Commits " + rollbackMetadata.getCommitsRollback() + " is complete");
+      }
     } catch (IOException e) {
       throw new HoodieIOException("Error executing rollback at instant " + instantTime, e);
     } finally {
-      if (!skipLocking) {
+      if (enableLocking) {
         this.txnManager.endTransaction(Option.empty());
       }
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java
index 189de373d92d7..8475afe16eea0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java
@@ -20,7 +20,6 @@
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 import org.apache.hudi.common.HoodieRollbackStat;
 import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -214,8 +213,4 @@ protected Map<HoodieLogBlock.HeaderMetadataType, String> generateHeader(String c
         String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
     return header;
   }
-
-  public interface SerializablePathFilter extends PathFilter, Serializable {
-
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
index 5e11354303f26..5315ce713eef3 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
@@ -67,7 +67,6 @@ protected List<HoodieRollbackStat> executeRollback(HoodieRollbackPlan hoodieRoll
 
     List<HoodieRollbackStat> stats = new ArrayList<>();
     HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
-    HoodieInstant resolvedInstant = instantToRollback;
 
     if (instantToRollback.isCompleted()) {
       LOG.info("Unpublishing instant " + instantToRollback);
@@ -86,8 +85,6 @@ protected List<HoodieRollbackStat> executeRollback(HoodieRollbackPlan hoodieRoll
 
     dropBootstrapIndexIfNeeded(instantToRollback);
 
-    // Delete Inflight instant if enabled
-    deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, resolvedInstant);
     LOG.info("Time(in ms) taken to finish rollback " + rollbackTimer.endTimer());
     return stats;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
deleted file mode 100644
index 628b2fc3720f8..0000000000000
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.table.action.rollback;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hudi.avro.model.HoodieRollbackRequest;
-import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieWriteStat;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import static org.apache.hudi.table.action.rollback.BaseRollbackHelper.EMPTY_STRING;
-
-/**
- * Performs Rollback of Hoodie Tables.
- */
-public class ListingBasedRollbackHelper implements Serializable {
-  private static final Logger LOG = LogManager.getLogger(ListingBasedRollbackHelper.class);
-
-  private final HoodieTableMetaClient metaClient;
-  private final HoodieWriteConfig config;
-
-  public ListingBasedRollbackHelper(HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
-    this.metaClient = metaClient;
-    this.config = config;
-  }
-
-  /**
-   * Collects info for Rollback plan.
-   */
-  public List<HoodieRollbackRequest> getRollbackRequestsForRollbackPlan(HoodieEngineContext context, HoodieInstant instantToRollback, List<ListingBasedRollbackRequest> rollbackRequests) {
-    int sparkPartitions = Math.max(Math.min(rollbackRequests.size(), config.getRollbackParallelism()), 1);
-    context.setJobStatus(this.getClass().getSimpleName(), "Creating Rollback Plan");
-    return getListingBasedRollbackRequests(context, instantToRollback, rollbackRequests, sparkPartitions);
-  }
-
-  /**
-   * May be delete interested files and collect stats or collect stats only.
-   *
-   * @param context           instance of {@link HoodieEngineContext} to use.
-   * @param instantToRollback {@link HoodieInstant} of interest for which deletion or collect stats is requested.
-   * @param rollbackRequests  List of {@link ListingBasedRollbackRequest} to be operated on.
-   * @param numPartitions     number of spark partitions to use for parallelism.
-   * @return stats collected with or w/o actual deletions.
-   */
-  private List<HoodieRollbackRequest> getListingBasedRollbackRequests(HoodieEngineContext context, HoodieInstant instantToRollback,
-                                                                      List<ListingBasedRollbackRequest> rollbackRequests, int numPartitions) {
-    return context.map(rollbackRequests, rollbackRequest -> {
-      switch (rollbackRequest.getType()) {
-        case DELETE_DATA_FILES_ONLY: {
-          final FileStatus[] filesToDeletedStatus = getBaseFilesToBeDeleted(metaClient, config, instantToRollback.getTimestamp(),
-              rollbackRequest.getPartitionPath(), metaClient.getFs());
-          List<String> filesToBeDeleted = Arrays.stream(filesToDeletedStatus).map(fileStatus -> {
-            String fileToBeDeleted = fileStatus.getPath().toString();
-            // strip scheme
-            return fileToBeDeleted.substring(fileToBeDeleted.indexOf(":") + 1);
-          }).collect(Collectors.toList());
-          return new HoodieRollbackRequest(rollbackRequest.getPartitionPath(),
-              EMPTY_STRING, EMPTY_STRING, filesToBeDeleted, Collections.EMPTY_MAP);
-        }
-        case DELETE_DATA_AND_LOG_FILES: {
-          final FileStatus[] filesToDeletedStatus = getBaseAndLogFilesToBeDeleted(instantToRollback.getTimestamp(), rollbackRequest.getPartitionPath(), metaClient.getFs());
-          List<String> filesToBeDeleted = Arrays.stream(filesToDeletedStatus).map(fileStatus -> {
-            String fileToBeDeleted = fileStatus.getPath().toString();
-            // strip scheme
-            return fileToBeDeleted.substring(fileToBeDeleted.indexOf(":") + 1);
-          }).collect(Collectors.toList());
-          return new HoodieRollbackRequest(rollbackRequest.getPartitionPath(), EMPTY_STRING, EMPTY_STRING, filesToBeDeleted, Collections.EMPTY_MAP);
-        }
-        case APPEND_ROLLBACK_BLOCK: {
-          String fileId = rollbackRequest.getFileId().get();
-          String latestBaseInstant = rollbackRequest.getLatestBaseInstant().get();
-          HoodieWriteStat writeStat = rollbackRequest.getWriteStat().get();
-
-          Path fullLogFilePath = FSUtils.getPartitionPath(config.getBasePath(), writeStat.getPath());
-
-          Map<String, Long> logFilesWithBlocksToRollback =
-              Collections.singletonMap(fullLogFilePath.toString(), writeStat.getTotalWriteBytes());
-
-          return new HoodieRollbackRequest(rollbackRequest.getPartitionPath(), fileId, latestBaseInstant,
-              Collections.EMPTY_LIST, logFilesWithBlocksToRollback);
-        }
-        default:
-          throw new IllegalStateException("Unknown Rollback action " + rollbackRequest);
-      }
-    }, numPartitions);
-  }
-
-  private FileStatus[] getBaseFilesToBeDeleted(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
-                                               String commit, String partitionPath, FileSystem fs) throws IOException {
-    LOG.info("Collecting files to be cleaned/rolledback up for path " + partitionPath + " and commit " + commit);
-    String basefileExtension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
-    PathFilter filter = (path) -> {
-      if (path.toString().contains(basefileExtension)) {
-        String fileCommitTime = FSUtils.getCommitTime(path.getName());
-        return commit.equals(fileCommitTime);
-      }
-      return false;
-    };
-    return fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(), partitionPath), filter);
-  }
-
-  private FileStatus[] getBaseAndLogFilesToBeDeleted(String commit, String partitionPath, FileSystem fs) throws IOException {
-    String basefileExtension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
-    BaseRollbackHelper.SerializablePathFilter filter = (path) -> {
-      if (path.toString().endsWith(basefileExtension)) {
-        String fileCommitTime = FSUtils.getCommitTime(path.getName());
-        return commit.equals(fileCommitTime);
-      } else if (FSUtils.isLogFile(path)) {
-        // Since the baseCommitTime is the only commit for new log files, it's okay here
-        String fileCommitTime = FSUtils.getBaseCommitTimeFromLogPath(path);
-        return commit.equals(fileCommitTime);
-      }
-      return false;
-    };
-    return fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(), partitionPath), filter);
-  }
-}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java
index e6355526e5233..ed37798607bd8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java
@@ -18,19 +18,42 @@
 
 package org.apache.hudi.table.action.rollback;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieRollbackException;
 import org.apache.hudi.table.HoodieTable;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+import org.jetbrains.annotations.NotNull;
 
-import java.io.IOException;
-import java.util.List;
+import static org.apache.hudi.client.utils.MetadataConversionUtils.getHoodieCommitMetadata;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+import static org.apache.hudi.table.action.rollback.BaseRollbackHelper.EMPTY_STRING;
 
 /**
  * Listing based rollback strategy to fetch list of {@link HoodieRollbackRequest}s.
@@ -39,12 +62,15 @@ public class ListingBasedRollbackStrategy implements BaseRollbackPlanActionExecu
 
   private static final Logger LOG = LogManager.getLogger(ListingBasedRollbackStrategy.class);
 
-  protected final HoodieTable table;
-  protected final HoodieEngineContext context;
+  protected final HoodieTable<?, ?, ?, ?> table;
+
+  protected final transient HoodieEngineContext context;
+
   protected final HoodieWriteConfig config;
+
   protected final String instantTime;
 
-  public ListingBasedRollbackStrategy(HoodieTable table,
+  public ListingBasedRollbackStrategy(HoodieTable<?, ?, ?, ?> table,
                                       HoodieEngineContext context,
                                       HoodieWriteConfig config,
                                       String instantTime) {
@@ -57,20 +83,260 @@ public ListingBasedRollbackStrategy(HoodieTable table,
   @Override
   public List<HoodieRollbackRequest> getRollbackRequests(HoodieInstant instantToRollback) {
     try {
-      List<ListingBasedRollbackRequest> rollbackRequests = null;
-      if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
-        rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context,
-            table.getMetaClient().getBasePath());
-      } else {
-        rollbackRequests = RollbackUtils
-            .generateRollbackRequestsUsingFileListingMOR(instantToRollback, table, context);
-      }
-      List<HoodieRollbackRequest> listingBasedRollbackRequests = new ListingBasedRollbackHelper(table.getMetaClient(), config)
-          .getRollbackRequestsForRollbackPlan(context, instantToRollback, rollbackRequests);
-      return listingBasedRollbackRequests;
-    } catch (IOException e) {
+      HoodieTableMetaClient metaClient = table.getMetaClient();
+      List<String> partitionPaths =
+          FSUtils.getAllPartitionPaths(context, table.getMetaClient().getBasePath(), false, false);
+      int numPartitions = Math.max(Math.min(partitionPaths.size(), config.getRollbackParallelism()), 1);
+
+      context.setJobStatus(this.getClass().getSimpleName(), "Creating Listing Rollback Plan");
+
+      HoodieTableType tableType = table.getMetaClient().getTableType();
+      String baseFileExtension = getBaseFileExtension(metaClient);
+      Option<HoodieCommitMetadata> commitMetadataOptional = getHoodieCommitMetadata(metaClient, instantToRollback);
+      Boolean isCommitMetadataCompleted = checkCommitMetadataCompleted(instantToRollback, commitMetadataOptional);
+
+      return context.flatMap(partitionPaths, partitionPath -> {
+        List<HoodieRollbackRequest> hoodieRollbackRequests = new ArrayList<>(partitionPaths.size());
+        FileStatus[] filesToDelete =
+            fetchFilesFromInstant(instantToRollback, partitionPath, metaClient.getBasePath(), baseFileExtension,
+                metaClient.getFs(), commitMetadataOptional, isCommitMetadataCompleted);
+
+        if (HoodieTableType.COPY_ON_WRITE == tableType) {
+          hoodieRollbackRequests.add(getHoodieRollbackRequest(partitionPath, filesToDelete));
+        } else if (HoodieTableType.MERGE_ON_READ == tableType) {
+          String commit = instantToRollback.getTimestamp();
+          HoodieActiveTimeline activeTimeline = table.getMetaClient().reloadActiveTimeline();
+          switch (instantToRollback.getAction()) {
+            case HoodieTimeline.COMMIT_ACTION:
+            case HoodieTimeline.REPLACE_COMMIT_ACTION:
+              hoodieRollbackRequests.add(getHoodieRollbackRequest(partitionPath, filesToDelete));
+              break;
+            case HoodieTimeline.COMPACTION_ACTION:
+              // If there is no delta commit present after the current commit (if compaction), no action, else we
+              // need to make sure that a compaction commit rollback also deletes any log files written as part of the
+              // succeeding deltacommit.
+              boolean higherDeltaCommits =
+                  !activeTimeline.getDeltaCommitTimeline().filterCompletedInstants().findInstantsAfter(commit, 1)
+                      .empty();
+              if (higherDeltaCommits) {
+                // Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
+                // and has not yet finished. In this scenario we should delete only the newly created base files
+                // and not corresponding base commit log files created with this as baseCommit since updates would
+                // have been written to the log files.
+                hoodieRollbackRequests.add(getHoodieRollbackRequest(partitionPath,
+                    listFilesToBeDeleted(instantToRollback.getTimestamp(), baseFileExtension, partitionPath,
+                        metaClient.getFs())));
+              } else {
+                // No deltacommits present after this compaction commit (inflight or requested). In this case, we
+                // can also delete any log files that were created with this compaction commit as base
+                // commit.
+                hoodieRollbackRequests.add(getHoodieRollbackRequest(partitionPath, filesToDelete));
+              }
+              break;
+            case HoodieTimeline.DELTA_COMMIT_ACTION:
+              // --------------------------------------------------------------------------------------------------
+              // (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
+              // --------------------------------------------------------------------------------------------------
+              // (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
+              // this scenario we would want to delete these log files.
+              // (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
+              // HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
+              // (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
+              // being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
+              // and hence will end up deleting these log files. This is done so there are no orphan log files
+              // lying around.
+              // (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
+              // taken in this scenario is a combination of (A.2) and (A.3)
+              // ---------------------------------------------------------------------------------------------------
+              // (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
+              // ---------------------------------------------------------------------------------------------------
+              // (B.1) Failed first commit - Inserts were written to base files and HoodieWriteStat has no entries.
+              // In this scenario, we delete all the base files written for the failed commit.
+              // (B.2) Failed recurring commits - Inserts were written to base files and updates to log files. In
+              // this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
+              // (B.3) Rollback triggered for first commit - Same as (B.1)
+              // (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
+              // as well if the base base file gets deleted.
+              HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
+                  table.getMetaClient().getCommitTimeline().getInstantDetails(instantToRollback).get(),
+                  HoodieCommitMetadata.class);
+
+              // In case all data was inserts and the commit failed, delete the file belonging to that commit
+              // We do not know fileIds for inserts (first inserts are either log files or base files),
+              // delete all files for the corresponding failed commit, if present (same as COW)
+              hoodieRollbackRequests.add(getHoodieRollbackRequest(partitionPath, filesToDelete));
+
+              // append rollback blocks for updates and inserts as A.2 and B.2
+              if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
+                hoodieRollbackRequests.addAll(
+                    getRollbackRequestToAppend(partitionPath, instantToRollback, commitMetadata, table));
+              }
+              break;
+            default:
+              throw new HoodieRollbackException("Unknown listing type, during rollback of " + instantToRollback);
+          }
+        } else {
+          throw new HoodieRollbackException(
+              String.format("Unsupported table type: %s, during listing rollback of %s", tableType, instantToRollback));
+        }
+        return hoodieRollbackRequests.stream();
+      }, numPartitions);
+    } catch (Exception e) {
       LOG.error("Generating rollback requests failed for " + instantToRollback.getTimestamp(), e);
       throw new HoodieRollbackException("Generating rollback requests failed for " + instantToRollback.getTimestamp(), e);
     }
   }
+
+  private String getBaseFileExtension(HoodieTableMetaClient metaClient) {
+    return metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
+  }
+
+  @NotNull
+  private HoodieRollbackRequest getHoodieRollbackRequest(String partitionPath, FileStatus[] filesToDeletedStatus) {
+    List<String> filesToDelete = getFilesToBeDeleted(filesToDeletedStatus);
+    return new HoodieRollbackRequest(
+        partitionPath, EMPTY_STRING, EMPTY_STRING, filesToDelete, Collections.emptyMap());
+  }
+
+  @NotNull
+  private List<String> getFilesToBeDeleted(FileStatus[] dataFilesToDeletedStatus) {
+    return Arrays.stream(dataFilesToDeletedStatus).map(fileStatus -> {
+      String dataFileToBeDeleted = fileStatus.getPath().toString();
+      // strip scheme E.g: file:/var/folders
+      return dataFileToBeDeleted.substring(dataFileToBeDeleted.indexOf(":") + 1);
+    }).collect(Collectors.toList());
+  }
+
+  private FileStatus[] listFilesToBeDeleted(String commit, String basefileExtension, String partitionPath,
+                                            FileSystem fs) throws IOException {
+    LOG.info("Collecting files to be cleaned/rolledback up for path " + partitionPath + " and commit " + commit);
+    PathFilter filter = (path) -> {
+      if (path.toString().contains(basefileExtension)) {
+        String fileCommitTime = FSUtils.getCommitTime(path.getName());
+        return commit.equals(fileCommitTime);
+      }
+      return false;
+    };
+    return fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(), partitionPath), filter);
+  }
+
+  private FileStatus[] fetchFilesFromInstant(HoodieInstant instantToRollback, String partitionPath, String basePath,
+                                             String baseFileExtension, HoodieWrapperFileSystem fs,
+                                             Option<HoodieCommitMetadata> commitMetadataOptional,
+                                             Boolean isCommitMetadataCompleted) throws IOException {
+    if (isCommitMetadataCompleted) {
+      return fetchFilesFromCommitMetadata(instantToRollback, partitionPath, basePath, commitMetadataOptional.get(),
+          baseFileExtension, fs);
+    } else {
+      return fetchFilesFromListFiles(instantToRollback, partitionPath, basePath, baseFileExtension, fs);
+    }
+  }
+
+  private FileStatus[] fetchFilesFromCommitMetadata(HoodieInstant instantToRollback, String partitionPath,
+                                                    String basePath, HoodieCommitMetadata commitMetadata,
+                                                    String baseFileExtension, HoodieWrapperFileSystem fs)
+      throws IOException {
+    SerializablePathFilter pathFilter = getSerializablePathFilter(baseFileExtension, instantToRollback.getTimestamp());
+    Path[] filePaths = getFilesFromCommitMetadata(basePath, commitMetadata, partitionPath);
+
+    return fs.listStatus(filePaths, pathFilter);
+  }
+
+  private FileStatus[] fetchFilesFromListFiles(HoodieInstant instantToRollback, String partitionPath, String basePath,
+                                               String baseFileExtension, HoodieWrapperFileSystem fs)
+      throws IOException {
+    SerializablePathFilter pathFilter = getSerializablePathFilter(baseFileExtension, instantToRollback.getTimestamp());
+    Path[] filePaths = listFilesToBeDeleted(basePath, partitionPath);
+
+    return fs.listStatus(filePaths, pathFilter);
+  }
+
+  private Boolean checkCommitMetadataCompleted(HoodieInstant instantToRollback,
+                                               Option<HoodieCommitMetadata> commitMetadataOptional) {
+    return commitMetadataOptional.isPresent() && instantToRollback.isCompleted()
+        && !WriteOperationType.UNKNOWN.equals(commitMetadataOptional.get().getOperationType());
+  }
+
+  private static Path[] listFilesToBeDeleted(String basePath, String partitionPath) {
+    return new Path[] {FSUtils.getPartitionPath(basePath, partitionPath)};
+  }
+
+  private static Path[] getFilesFromCommitMetadata(String basePath, HoodieCommitMetadata commitMetadata, String partitionPath) {
+    List<String> fullPaths = commitMetadata.getFullPathsByPartitionPath(basePath, partitionPath);
+    return fullPaths.stream().map(Path::new).toArray(Path[]::new);
+  }
+
+  @NotNull
+  private static SerializablePathFilter getSerializablePathFilter(String basefileExtension, String commit) {
+    return (path) -> {
+      if (path.toString().endsWith(basefileExtension)) {
+        String fileCommitTime = FSUtils.getCommitTime(path.getName());
+        return commit.equals(fileCommitTime);
+      } else if (FSUtils.isLogFile(path)) {
+        // Since the baseCommitTime is the only commit for new log files, it's okay here
+        String fileCommitTime = FSUtils.getBaseCommitTimeFromLogPath(path);
+        return commit.equals(fileCommitTime);
+      }
+      return false;
+    };
+  }
+
+  public static List<HoodieRollbackRequest> getRollbackRequestToAppend(String partitionPath, HoodieInstant rollbackInstant,
+                                                                       HoodieCommitMetadata commitMetadata, HoodieTable<?, ?, ?, ?> table) {
+    List<HoodieRollbackRequest> hoodieRollbackRequests =  new ArrayList<>();
+    checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
+
+    // wStat.getPrevCommit() might not give the right commit time in the following
+    // scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
+    // used to write the new log files. In this case, the commit time for the log file is the compaction requested time.
+    // But the index (global) might store the baseCommit of the base and not the requested, hence get the
+    // baseCommit always by listing the file slice
+    // With multi writers, rollbacks could be lazy. and so we need to use getLatestFileSlicesBeforeOrOn() instead of getLatestFileSlices()
+    Map<String, FileSlice> latestFileSlices = table.getSliceView()
+        .getLatestFileSlicesBeforeOrOn(partitionPath, rollbackInstant.getTimestamp(), true)
+        .collect(Collectors.toMap(FileSlice::getFileId, Function.identity()));
+
+    List<HoodieWriteStat> hoodieWriteStats = commitMetadata.getPartitionToWriteStats().get(partitionPath)
+        .stream()
+        .filter(writeStat -> {
+          // Filter out stats without prevCommit since they are all inserts
+          boolean validForRollback = (writeStat != null) && (!writeStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT))
+              && (writeStat.getPrevCommit() != null) && latestFileSlices.containsKey(writeStat.getFileId());
+
+          if (!validForRollback) {
+            return false;
+          }
+
+          FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
+
+          // For sanity, log-file base-instant time can never be less than base-commit on which we are rolling back
+          checkArgument(
+              HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(),
+                  HoodieTimeline.LESSER_THAN_OR_EQUALS, rollbackInstant.getTimestamp()),
+              "Log-file base-instant could not be less than the instant being rolled back");
+
+          // Command block "rolling back" the preceding block {@link HoodieCommandBlockTypeEnum#ROLLBACK_PREVIOUS_BLOCK}
+          // w/in the latest file-slice is appended iff base-instant of the log-file is _strictly_ less
+          // than the instant of the Delta Commit being rolled back. Otherwise, log-file will be cleaned up
+          // in a different branch of the flow.
+          return HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN, rollbackInstant.getTimestamp());
+        })
+        .collect(Collectors.toList());
+
+    for (HoodieWriteStat writeStat : hoodieWriteStats) {
+      FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
+      String fileId = writeStat.getFileId();
+      String latestBaseInstant = latestFileSlice.getBaseInstantTime();
+
+      Path fullLogFilePath = FSUtils.getPartitionPath(table.getConfig().getBasePath(), writeStat.getPath());
+
+      Map<String, Long> logFilesWithBlocksToRollback =
+          Collections.singletonMap(fullLogFilePath.toString(), writeStat.getTotalWriteBytes());
+
+      hoodieRollbackRequests.add(new HoodieRollbackRequest(partitionPath, fileId, latestBaseInstant,
+          Collections.emptyList(), logFilesWithBlocksToRollback));
+    }
+
+    return hoodieRollbackRequests;
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
index c2b25ffc5bf5a..e4054e9221969 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
@@ -67,7 +67,6 @@ protected List<HoodieRollbackStat> executeRollback(HoodieRollbackPlan hoodieRoll
 
     LOG.info("Rolling back instant " + instantToRollback);
 
-    HoodieInstant resolvedInstant = instantToRollback;
     // Atomically un-publish all non-inflight commits
     if (instantToRollback.isCompleted()) {
       LOG.info("Un-publishing instant " + instantToRollback + ", deleteInstants=" + deleteInstants);
@@ -93,8 +92,6 @@ protected List<HoodieRollbackStat> executeRollback(HoodieRollbackPlan hoodieRoll
 
     dropBootstrapIndexIfNeeded(resolvedInstant);
 
-    // Delete Inflight instants if enabled
-    deleteInflightAndRequestedInstant(deleteInstants, table.getActiveTimeline(), resolvedInstant);
     LOG.info("Time(in ms) taken to finish rollback " + rollbackTimer.endTimer());
     return allRollbackStats;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java
index 2bc9b59b0d1f1..ce7a18515137b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java
@@ -21,21 +21,13 @@
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.common.HoodieRollbackStat;
-import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.table.HoodieTable;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -44,9 +36,6 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
-import java.util.function.Function;
-import java.util.stream.Collectors;
 
 import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
 
@@ -102,160 +91,4 @@ static HoodieRollbackStat mergeRollbackStat(HoodieRollbackStat stat1, HoodieRoll
     return new HoodieRollbackStat(stat1.getPartitionPath(), successDeleteFiles, failedDeleteFiles, commandBlocksCount);
   }
 
-  /**
-   * Generate all rollback requests that needs rolling back this action without actually performing rollback for COW table type.
-   * @param engineContext instance of {@link HoodieEngineContext} to use.
-   * @param basePath base path of interest.
-   * @return {@link List} of {@link ListingBasedRollbackRequest}s thus collected.
-   */
-  public static List<ListingBasedRollbackRequest> generateRollbackRequestsByListingCOW(HoodieEngineContext engineContext, String basePath) {
-    return FSUtils.getAllPartitionPaths(engineContext, basePath, false, false).stream()
-        .map(ListingBasedRollbackRequest::createRollbackRequestWithDeleteDataAndLogFilesAction)
-        .collect(Collectors.toList());
-  }
-
-  /**
-   * Generate all rollback requests that we need to perform for rolling back this action without actually performing rolling back for MOR table type.
-   *
-   * @param instantToRollback Instant to Rollback
-   * @param table instance of {@link HoodieTable} to use.
-   * @param context instance of {@link HoodieEngineContext} to use.
-   * @return list of rollback requests
-   */
-  public static List<ListingBasedRollbackRequest> generateRollbackRequestsUsingFileListingMOR(HoodieInstant instantToRollback, HoodieTable table, HoodieEngineContext context) throws IOException {
-    String commit = instantToRollback.getTimestamp();
-    HoodieWriteConfig config = table.getConfig();
-    List<String> partitions = FSUtils.getAllPartitionPaths(context, table.getMetaClient().getBasePath(), false, false);
-    if (partitions.isEmpty()) {
-      return new ArrayList<>();
-    }
-    int sparkPartitions = Math.max(Math.min(partitions.size(), config.getRollbackParallelism()), 1);
-    context.setJobStatus(RollbackUtils.class.getSimpleName(), "Generate all rollback requests");
-    return context.flatMap(partitions, partitionPath -> {
-      HoodieActiveTimeline activeTimeline = table.getMetaClient().reloadActiveTimeline();
-      List<ListingBasedRollbackRequest> partitionRollbackRequests = new ArrayList<>();
-      switch (instantToRollback.getAction()) {
-        case HoodieTimeline.COMMIT_ACTION:
-        case HoodieTimeline.REPLACE_COMMIT_ACTION:
-          LOG.info("Rolling back commit action.");
-          partitionRollbackRequests.add(
-              ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
-          break;
-        case HoodieTimeline.COMPACTION_ACTION:
-          // If there is no delta commit present after the current commit (if compaction), no action, else we
-          // need to make sure that a compaction commit rollback also deletes any log files written as part of the
-          // succeeding deltacommit.
-          boolean higherDeltaCommits =
-              !activeTimeline.getDeltaCommitTimeline().filterCompletedInstants().findInstantsAfter(commit, 1).empty();
-          if (higherDeltaCommits) {
-            // Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
-            // and has not yet finished. In this scenario we should delete only the newly created base files
-            // and not corresponding base commit log files created with this as baseCommit since updates would
-            // have been written to the log files.
-            LOG.info("Rolling back compaction. There are higher delta commits. So only deleting data files");
-            partitionRollbackRequests.add(
-                ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataFilesOnlyAction(partitionPath));
-          } else {
-            // No deltacommits present after this compaction commit (inflight or requested). In this case, we
-            // can also delete any log files that were created with this compaction commit as base
-            // commit.
-            LOG.info("Rolling back compaction plan. There are NO higher delta commits. So deleting both data and"
-                + " log files");
-            partitionRollbackRequests.add(
-                ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
-          }
-          break;
-        case HoodieTimeline.DELTA_COMMIT_ACTION:
-          // --------------------------------------------------------------------------------------------------
-          // (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
-          // --------------------------------------------------------------------------------------------------
-          // (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
-          // this scenario we would want to delete these log files.
-          // (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
-          // HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
-          // (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
-          // being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
-          // and hence will end up deleting these log files. This is done so there are no orphan log files
-          // lying around.
-          // (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
-          // taken in this scenario is a combination of (A.2) and (A.3)
-          // ---------------------------------------------------------------------------------------------------
-          // (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
-          // ---------------------------------------------------------------------------------------------------
-          // (B.1) Failed first commit - Inserts were written to base files and HoodieWriteStat has no entries.
-          // In this scenario, we delete all the base files written for the failed commit.
-          // (B.2) Failed recurring commits - Inserts were written to base files and updates to log files. In
-          // this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
-          // (B.3) Rollback triggered for first commit - Same as (B.1)
-          // (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
-          // as well if the base base file gets deleted.
-          HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
-              table.getMetaClient().getCommitTimeline().getInstantDetails(instantToRollback).get(),
-              HoodieCommitMetadata.class);
-
-          // In case all data was inserts and the commit failed, delete the file belonging to that commit
-          // We do not know fileIds for inserts (first inserts are either log files or base files),
-          // delete all files for the corresponding failed commit, if present (same as COW)
-          partitionRollbackRequests.add(
-              ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
-
-          // append rollback blocks for updates and inserts as A.2 and B.2
-          if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
-            partitionRollbackRequests
-                .addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata, table));
-          }
-          break;
-        default:
-          break;
-      }
-      return partitionRollbackRequests.stream();
-    }, Math.min(partitions.size(), sparkPartitions)).stream().filter(Objects::nonNull).collect(Collectors.toList());
-  }
-
-  private static List<ListingBasedRollbackRequest> generateAppendRollbackBlocksAction(String partitionPath, HoodieInstant rollbackInstant,
-      HoodieCommitMetadata commitMetadata, HoodieTable table) {
-    checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
-
-    // wStat.getPrevCommit() might not give the right commit time in the following
-    // scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
-    // used to write the new log files. In this case, the commit time for the log file is the compaction requested time.
-    // But the index (global) might store the baseCommit of the base and not the requested, hence get the
-    // baseCommit always by listing the file slice
-    // With multi writers, rollbacks could be lazy. and so we need to use getLatestFileSlicesBeforeOrOn() instead of getLatestFileSlices()
-    Map<String, FileSlice> latestFileSlices = table.getSliceView()
-        .getLatestFileSlicesBeforeOrOn(partitionPath, rollbackInstant.getTimestamp(), true)
-        .collect(Collectors.toMap(FileSlice::getFileId, Function.identity()));
-
-    return commitMetadata.getPartitionToWriteStats().get(partitionPath)
-        .stream()
-        .filter(writeStat -> {
-          // Filter out stats without prevCommit since they are all inserts
-          boolean validForRollback = (writeStat != null) && (!writeStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT))
-              && (writeStat.getPrevCommit() != null) && latestFileSlices.containsKey(writeStat.getFileId());
-
-          if (!validForRollback) {
-            return false;
-          }
-
-          FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
-
-          // For sanity, log-file base-instant time can never be less than base-commit on which we are rolling back
-          checkArgument(
-              HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(),
-                  HoodieTimeline.LESSER_THAN_OR_EQUALS, rollbackInstant.getTimestamp()),
-              "Log-file base-instant could not be less than the instant being rolled back");
-
-          // Command block "rolling back" the preceding block {@link HoodieCommandBlockTypeEnum#ROLLBACK_PREVIOUS_BLOCK}
-          // w/in the latest file-slice is appended iff base-instant of the log-file is _strictly_ less
-          // than the instant of the Delta Commit being rolled back. Otherwise, log-file will be cleaned up
-          // in a different branch of the flow.
-          return HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN, rollbackInstant.getTimestamp());
-        })
-        .map(writeStat -> {
-          FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
-          return ListingBasedRollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath,
-              writeStat.getFileId(), latestFileSlice.getBaseInstantTime(), writeStat);
-        })
-        .collect(Collectors.toList());
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializablePathFilter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializablePathFilter.java
new file mode 100644
index 0000000000000..e2affdf5ca891
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializablePathFilter.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.rollback;
+
+import org.apache.hadoop.fs.PathFilter;
+
+import java.io.Serializable;
+
+public interface SerializablePathFilter extends PathFilter, Serializable {
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java
index 17dc01d0213e7..86a594af17c5e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 
 import java.util.Collections;
 import java.util.Map;
@@ -33,6 +34,11 @@ public class FourToThreeDowngradeHandler implements DowngradeHandler {
 
   @Override
   public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
+    if (config.isMetadataTableEnabled()) {
+      // Metadata Table in version 4 has a schema that is not forward compatible.
+      // Hence, it is safe to delete the metadata table, which will be re-initialized in subsequent commit.
+      HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context);
+    }
     return Collections.emptyMap();
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
index 72e96bb4103bc..4da675ea82004 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
@@ -23,10 +23,15 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metadata.MetadataPartitionType;
 
 import java.util.Hashtable;
 import java.util.Map;
 
+import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
+import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
+
 /**
  * UpgradeHandler to assist in upgrading {@link org.apache.hudi.table.HoodieTable} from version 3 to 4.
  */
@@ -35,7 +40,12 @@ public class ThreeToFourUpgradeHandler implements UpgradeHandler {
   @Override
   public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     Map<ConfigProperty, String> tablePropsToAdd = new Hashtable<>();
-    tablePropsToAdd.put(HoodieTableConfig.TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
+    tablePropsToAdd.put(TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
+    // if metadata is enabled and files partition exist then update TABLE_METADATA_INDEX_COMPLETED
+    // schema for the files partition is same between the two versions
+    if (config.isMetadataTableEnabled() && metadataPartitionExists(config.getBasePath(), context, MetadataPartitionType.FILES)) {
+      tablePropsToAdd.put(TABLE_METADATA_PARTITIONS, MetadataPartitionType.FILES.getPartitionPath());
+    }
     return tablePropsToAdd;
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java
index 6a114154c8778..42add690f29ea 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java
@@ -18,14 +18,14 @@
 
 package org.apache.hudi.table.upgrade;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 import org.apache.hudi.common.HoodieRollbackStat;
 import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.IOType;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.marker.MarkerType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -35,15 +35,10 @@
 import org.apache.hudi.exception.HoodieRollbackException;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.rollback.BaseRollbackHelper;
-import org.apache.hudi.table.action.rollback.ListingBasedRollbackHelper;
-import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
-import org.apache.hudi.table.action.rollback.RollbackUtils;
+import org.apache.hudi.table.action.rollback.ListingBasedRollbackStrategy;
 import org.apache.hudi.table.marker.WriteMarkers;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
 
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -100,14 +95,7 @@ protected void recreateMarkers(final String commitInstantTime,
         writeMarkers.quietDeleteMarkerDir(context, parallelism);
 
         // generate rollback stats
-        List<ListingBasedRollbackRequest> rollbackRequests;
-        if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
-          rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context, table.getMetaClient().getBasePath());
-        } else {
-          rollbackRequests = RollbackUtils.generateRollbackRequestsUsingFileListingMOR(commitInstantOpt.get(), table, context);
-        }
-        List<HoodieRollbackStat> rollbackStats = getListBasedRollBackStats(table.getMetaClient(), table.getConfig(),
-            context, commitInstantOpt, rollbackRequests);
+        List<HoodieRollbackStat> rollbackStats = getListBasedRollBackStats(table, context, commitInstantOpt);
 
         // recreate markers adhering to marker based rollback
         for (HoodieRollbackStat rollbackStat : rollbackStats) {
@@ -126,12 +114,12 @@ protected void recreateMarkers(final String commitInstantTime,
     }
   }
 
-  List<HoodieRollbackStat> getListBasedRollBackStats(
-      HoodieTableMetaClient metaClient, HoodieWriteConfig config, HoodieEngineContext context,
-      Option<HoodieInstant> commitInstantOpt, List<ListingBasedRollbackRequest> rollbackRequests) {
-    List<HoodieRollbackRequest> hoodieRollbackRequests = new ListingBasedRollbackHelper(metaClient, config)
-        .getRollbackRequestsForRollbackPlan(context, commitInstantOpt.get(), rollbackRequests);
-    return new BaseRollbackHelper(metaClient, config).collectRollbackStats(context, commitInstantOpt.get(), hoodieRollbackRequests);
+  List<HoodieRollbackStat> getListBasedRollBackStats(HoodieTable<?, ?, ?, ?> table, HoodieEngineContext context, Option<HoodieInstant> commitInstantOpt) {
+    List<HoodieRollbackRequest> hoodieRollbackRequests =
+        new ListingBasedRollbackStrategy(table, context, table.getConfig(), commitInstantOpt.get().getTimestamp())
+            .getRollbackRequests(commitInstantOpt.get());
+    return new BaseRollbackHelper(table.getMetaClient(), table.getConfig())
+        .collectRollbackStats(context, commitInstantOpt.get(), hoodieRollbackRequests);
   }
 
   /**
@@ -143,7 +131,7 @@ List<HoodieRollbackStat> getListBasedRollBackStats(
    * @param table       {@link HoodieTable} instance to use
    * @return the marker file name thus curated.
    */
-  private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable table) {
+  private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable<?, ?, ?, ?> table) {
     Path logPath = new Path(table.getMetaClient().getBasePath(), logFilePath);
     String fileId = FSUtils.getFileIdFromLogPath(logPath);
     String baseInstant = FSUtils.getBaseCommitTimeFromLogPath(logPath);
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleConcurrentFileWritesConflictResolutionStrategy.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleConcurrentFileWritesConflictResolutionStrategy.java
index afe8e05aa0662..e7cc296ff6ae4 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleConcurrentFileWritesConflictResolutionStrategy.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleConcurrentFileWritesConflictResolutionStrategy.java
@@ -23,6 +23,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.hudi.avro.model.HoodieClusteringGroup;
@@ -31,6 +32,7 @@
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
 import org.apache.hudi.avro.model.HoodieSliceInfo;
+import org.apache.hudi.client.utils.TransactionUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieWriteStat;
@@ -40,6 +42,7 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant.State;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.testutils.FileCreateUtils;
 import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestTable;
@@ -318,16 +321,20 @@ private void createCommit(String instantTime) throws Exception {
         .withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
   }
 
-  private HoodieCommitMetadata createCommitMetadata(String instantTime) {
+  private HoodieCommitMetadata createCommitMetadata(String instantTime, String writeFileName) {
     HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
     commitMetadata.addMetadata("test", "test");
     HoodieWriteStat writeStat = new HoodieWriteStat();
-    writeStat.setFileId("file-1");
+    writeStat.setFileId(writeFileName);
     commitMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
     commitMetadata.setOperationType(WriteOperationType.INSERT);
     return commitMetadata;
   }
 
+  private HoodieCommitMetadata createCommitMetadata(String instantTime) {
+    return createCommitMetadata(instantTime, "file-1");
+  }
+
   private void createInflightCommit(String instantTime) throws Exception {
     String fileId1 = "file-" + instantTime + "-1";
     String fileId2 = "file-" + instantTime + "-2";
@@ -417,4 +424,147 @@ private void createReplace(String instantTime, WriteOperationType writeOperation
         .withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
   }
 
+  // try to simulate HUDI-3355
+  @Test
+  public void testConcurrentWritesWithPendingInstants() throws Exception {
+    // step1: create a pending replace/commit/compact instant: C1,C11,C12
+    String newInstantTimeC1 = HoodieActiveTimeline.createNewInstantTime();
+    createPendingReplace(newInstantTimeC1, WriteOperationType.CLUSTER);
+
+    String newCompactionInstantTimeC11 = HoodieActiveTimeline.createNewInstantTime();
+    createPendingCompaction(newCompactionInstantTimeC11);
+
+    String newCommitInstantTimeC12 = HoodieActiveTimeline.createNewInstantTime();
+    createInflightCommit(newCommitInstantTimeC12);
+    // step2: create a complete commit which has no conflict with C1,C11,C12, named it as C2
+    createCommit(HoodieActiveTimeline.createNewInstantTime());
+    HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
+    // consider commits before this are all successful
+    Option<HoodieInstant> lastSuccessfulInstant = timeline.getCommitsTimeline().filterCompletedInstants().lastInstant();
+    // step3: write 1 starts, which has conflict with C1,C11,C12, named it as C3
+    String currentWriterInstant = HoodieActiveTimeline.createNewInstantTime();
+    createInflightCommit(currentWriterInstant);
+    // step4: create a requested commit, which has conflict with C3, named it as C4
+    String commitC4 = HoodieActiveTimeline.createNewInstantTime();
+    createRequestedCommit(commitC4);
+    // get PendingCommit during write 1 operation
+    metaClient.reloadActiveTimeline();
+    Set<String> pendingInstant = TransactionUtils.getInflightAndRequestedInstants(metaClient);
+    pendingInstant.remove(currentWriterInstant);
+    // step5: finished pending cluster/compaction/commit operation
+    createCompleteReplace(newInstantTimeC1, WriteOperationType.CLUSTER);
+    createCompleteCompaction(newCompactionInstantTimeC11);
+    createCompleteCommit(newCommitInstantTimeC12);
+    createCompleteCommit(commitC4);
+
+    // step6: do check
+    Option<HoodieInstant> currentInstant = Option.of(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, currentWriterInstant));
+    SimpleConcurrentFileWritesConflictResolutionStrategy strategy = new SimpleConcurrentFileWritesConflictResolutionStrategy();
+    // make sure c3 has conflict with C1,C11,C12,C4;
+    HoodieCommitMetadata currentMetadata = createCommitMetadata(currentWriterInstant, "file-2");
+    timeline.reload();
+    List<HoodieInstant> completedInstantsDuringCurrentWriteOperation = TransactionUtils
+            .getCompletedInstantsDuringCurrentWriteOperation(metaClient, pendingInstant).collect(Collectors.toList());
+    // C1,C11,C12,C4 should be included
+    Assertions.assertTrue(completedInstantsDuringCurrentWriteOperation.size() == 4);
+
+    ConcurrentOperation thisCommitOperation = new ConcurrentOperation(currentInstant.get(), currentMetadata);
+    // check C3 has conflict with C1,C11,C12,C4
+    for (HoodieInstant instant : completedInstantsDuringCurrentWriteOperation) {
+      ConcurrentOperation thatCommitOperation = new ConcurrentOperation(instant, metaClient);
+      Assertions.assertTrue(strategy.hasConflict(thisCommitOperation, thatCommitOperation));
+      try {
+        strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation);
+      } catch (HoodieWriteConflictException e) {
+        // expected
+      }
+    }
+  }
+
+  private void createPendingReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
+    String fileId1 = "file-1";
+    String fileId2 = "file-2";
+    // create replace instant to mark fileId2 as deleted
+    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
+    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
+    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
+    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
+    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
+    sliceInfo.setFileId(fileId2);
+    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
+    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
+    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
+    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
+    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
+    HoodieTestTable.of(metaClient)
+            .addPendingReplace(instantTime, Option.of(requestedReplaceMetadata), Option.empty())
+            .withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
+  }
+
+  private void createCompleteReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
+    String fileId1 = "file-1";
+    String fileId2 = "file-2";
+
+    // create replace instant to mark fileId2 as deleted
+    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
+    Map<String, List<String>> partitionFileIds = new HashMap<>();
+    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
+    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
+    HoodieWriteStat writeStat = new HoodieWriteStat();
+    writeStat.setFileId("file-2");
+    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
+    replaceMetadata.setOperationType(writeOperationType);
+    FileCreateUtils.createReplaceCommit(metaClient.getBasePath(), instantTime, replaceMetadata);
+  }
+
+  private void createPendingCompaction(String instantTime) throws Exception {
+    String fileId1 = "file-2";
+    HoodieCompactionPlan compactionPlan = new HoodieCompactionPlan();
+    compactionPlan.setVersion(TimelineLayoutVersion.CURR_VERSION);
+    HoodieCompactionOperation operation = new HoodieCompactionOperation();
+    operation.setFileId(fileId1);
+    operation.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
+    operation.setDataFilePath("/file-2");
+    operation.setDeltaFilePaths(Arrays.asList("/file-2"));
+    compactionPlan.setOperations(Arrays.asList(operation));
+    HoodieTestTable.of(metaClient)
+            .addRequestedCompaction(instantTime, compactionPlan);
+    FileCreateUtils.createPendingInflightCompaction(metaClient.getBasePath(), instantTime);
+  }
+
+  private void createCompleteCompaction(String instantTime) throws Exception {
+    String fileId1 = "file-1";
+    String fileId2 = "file-2";
+
+    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
+    commitMetadata.addMetadata("test", "test");
+    commitMetadata.setOperationType(WriteOperationType.COMPACT);
+    commitMetadata.setCompacted(true);
+    HoodieWriteStat writeStat = new HoodieWriteStat();
+    writeStat.setFileId("file-2");
+    commitMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
+    HoodieTestTable.of(metaClient)
+            .addCommit(instantTime, Option.of(commitMetadata))
+            .withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
+  }
+
+  private void createRequestedCommit(String instantTime) throws Exception {
+    HoodieTestTable.of(metaClient)
+            .addInflightCommit(instantTime);
+  }
+
+  private void createCompleteCommit(String instantTime) throws Exception {
+    String fileId1 = "file-1";
+    String fileId2 = "file-2";
+
+    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
+    commitMetadata.addMetadata("test", "test");
+    HoodieWriteStat writeStat = new HoodieWriteStat();
+    writeStat.setFileId("file-2");
+    commitMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
+    commitMetadata.setOperationType(WriteOperationType.INSERT);
+    HoodieTestTable.of(metaClient)
+            .addCommit(instantTime, Option.of(commitMetadata))
+            .withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
index 778bef7324bde..85d40964b8fd2 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
@@ -136,6 +136,7 @@ public void testAutoConcurrencyConfigAdjustmentWithTableServices(HoodieTableType
             put(INLINE_COMPACT.key(), "true");
             put(AUTO_CLEAN.key(), "true");
             put(ASYNC_CLEAN.key(), "false");
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), true, true, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
         HoodieFailedWritesCleaningPolicy.LAZY, inProcessLockProviderClassName);
@@ -148,6 +149,7 @@ public void testAutoConcurrencyConfigAdjustmentWithTableServices(HoodieTableType
             put(INLINE_COMPACT.key(), "true");
             put(AUTO_CLEAN.key(), "true");
             put(ASYNC_CLEAN.key(), "true");
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), true, true, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
         HoodieFailedWritesCleaningPolicy.LAZY, inProcessLockProviderClassName);
@@ -160,6 +162,7 @@ public void testAutoConcurrencyConfigAdjustmentWithTableServices(HoodieTableType
             put(INLINE_COMPACT.key(), "false");
             put(AUTO_CLEAN.key(), "true");
             put(ASYNC_CLEAN.key(), "false");
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), true,
         tableType == HoodieTableType.MERGE_ON_READ,
@@ -181,6 +184,7 @@ public void testAutoConcurrencyConfigAdjustmentWithTableServices(HoodieTableType
             put(INLINE_COMPACT.key(), "true");
             put(AUTO_CLEAN.key(), "true");
             put(ASYNC_CLEAN.key(), "false");
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), Option.of(true), Option.of(false), Option.of(true),
         WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
@@ -188,6 +192,38 @@ public void testAutoConcurrencyConfigAdjustmentWithTableServices(HoodieTableType
         HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
   }
 
+  @ParameterizedTest
+  @EnumSource(HoodieTableType.class)
+  public void testAutoAdjustLockConfigs(HoodieTableType tableType) {
+    TypedProperties properties = new TypedProperties();
+    properties.setProperty(HoodieTableConfig.TYPE.key(), tableType.name());
+    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
+        .withPath("/tmp")
+        .withAutoAdjustLockConfigs(false)
+        .withClusteringConfig(new HoodieClusteringConfig.Builder().withAsyncClustering(true).build())
+        .withProperties(properties)
+        .build();
+
+    verifyConcurrencyControlRelatedConfigs(writeConfig,
+        true, true,
+        WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
+        HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
+        HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
+
+    writeConfig = HoodieWriteConfig.newBuilder()
+        .withPath("/tmp")
+        .withAutoAdjustLockConfigs(false)
+        .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL)
+        .withClusteringConfig(new HoodieClusteringConfig.Builder().withAsyncClustering(true).build())
+        .withProperties(properties)
+        .build();
+
+    verifyConcurrencyControlRelatedConfigs(writeConfig,
+        true, true,
+        WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, HoodieFailedWritesCleaningPolicy.LAZY,
+        HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
+  }
+
   @ParameterizedTest
   @EnumSource(HoodieTableType.class)
   public void testAutoConcurrencyConfigAdjustmentWithUserConfigs(HoodieTableType tableType) {
@@ -199,8 +235,10 @@ public void testAutoConcurrencyConfigAdjustmentWithUserConfigs(HoodieTableType t
         .withLockConfig(HoodieLockConfig.newBuilder()
             .withLockProvider(FileSystemBasedLockProviderTestClass.class)
             .build())
+        .withAutoAdjustLockConfigs(true)
         .withProperties(properties)
         .build();
+
     verifyConcurrencyControlRelatedConfigs(writeConfig,
         true, tableType == HoodieTableType.MERGE_ON_READ,
         WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
@@ -217,6 +255,7 @@ public void testAutoConcurrencyConfigAdjustmentWithUserConfigs(HoodieTableType t
             put(ASYNC_CLEAN.key(), "true");
             put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
                 ZookeeperBasedLockProvider.class.getName());
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), true, true,
         WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
@@ -227,6 +266,7 @@ public void testAutoConcurrencyConfigAdjustmentWithUserConfigs(HoodieTableType t
     writeConfig = createWriteConfig(new HashMap<String, String>() {
       {
         put(HoodieTableConfig.TYPE.key(), tableType.name());
+        put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
       }
     });
     if (writeConfig.areAnyTableServicesAsync()) {
@@ -252,6 +292,7 @@ public void testAutoConcurrencyConfigAdjustmentWithNoTableService(HoodieTableTyp
           {
             put(HoodieTableConfig.TYPE.key(), tableType.name());
             put(TABLE_SERVICES_ENABLED.key(), "false");
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), false, tableType == HoodieTableType.MERGE_ON_READ,
         WriteConcurrencyMode.fromValue(WRITE_CONCURRENCY_MODE.defaultValue()),
@@ -268,6 +309,7 @@ public void testAutoConcurrencyConfigAdjustmentWithNoTableService(HoodieTableTyp
                 WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value());
             put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
                 FileSystemBasedLockProviderTestClass.class.getName());
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), false, tableType == HoodieTableType.MERGE_ON_READ,
         WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
@@ -288,6 +330,7 @@ public void testAutoConcurrencyConfigAdjustmentWithMetadataTableDisabled(HoodieT
             put(INLINE_COMPACT.key(), "true");
             put(AUTO_CLEAN.key(), "true");
             put(ASYNC_CLEAN.key(), "false");
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), true, true,
         WriteConcurrencyMode.fromValue(WRITE_CONCURRENCY_MODE.defaultValue()),
@@ -306,6 +349,7 @@ public void testAutoConcurrencyConfigAdjustmentWithMetadataTableDisabled(HoodieT
                 WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value());
             put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
                 FileSystemBasedLockProviderTestClass.class.getName());
+            put(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
           }
         }), true, true, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
         HoodieFailedWritesCleaningPolicy.LAZY, FileSystemBasedLockProviderTestClass.class.getName());
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index fd25d92cba62e..2db8eb0204b34 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -18,71 +18,116 @@
 
 package org.apache.hudi.io.storage;
 
-import org.apache.hudi.common.bloom.BloomFilter;
-import org.apache.hudi.common.bloom.BloomFilterFactory;
-import org.apache.hudi.common.bloom.BloomFilterTypeCode;
-import org.apache.hudi.common.engine.TaskContextSupplier;
-import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
-import org.apache.hudi.common.model.HoodieAvroRecord;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.io.TempDir;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
+import org.apache.hudi.common.engine.TaskContextSupplier;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
 import org.mockito.Mockito;
 
-import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.TreeMap;
 import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 
 import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM;
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
-import static org.apache.hudi.io.storage.HoodieHFileConfig.CACHE_DATA_IN_L1;
-import static org.apache.hudi.io.storage.HoodieHFileConfig.DROP_BEHIND_CACHE_COMPACTION;
+import static org.apache.hudi.common.util.CollectionUtils.toStream;
 import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR;
-import static org.apache.hudi.io.storage.HoodieHFileConfig.PREFETCH_ON_OPEN;
+import static org.apache.hudi.io.storage.HoodieHFileReader.SCHEMA_KEY;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.mockito.Mockito.when;
 
-public class TestHoodieHFileReaderWriter {
-  @TempDir File tempDir;
-  private Path filePath;
+public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
+  private static final String DUMMY_BASE_PATH = "dummy_base_path";
+  // Number of records in HFile fixtures for compatibility tests
+  private static final int NUM_RECORDS_FIXTURE = 50;
+  private static final String SIMPLE_SCHEMA_HFILE_SUFFIX = "_simple.hfile";
+  private static final String COMPLEX_SCHEMA_HFILE_SUFFIX = "_complex.hfile";
+  private static final String BOOTSTRAP_INDEX_HFILE_SUFFIX = "_bootstrap_index_partitions.hfile";
 
-  @BeforeEach
-  public void setup() throws IOException {
-    filePath = new Path(tempDir.toString() + "tempFile.txt");
+  @Override
+  protected Path getFilePath() {
+    return new Path(tempDir.toString() + "/f1_1-0-1_000.hfile");
   }
 
-  @AfterEach
-  public void clearTempFile() {
-    File file = new File(filePath.toString());
-    if (file.exists()) {
-      file.delete();
-    }
+  @Override
+  protected HoodieFileWriter<GenericRecord> createWriter(
+      Schema avroSchema, boolean populateMetaFields) throws Exception {
+    String instantTime = "000";
+    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
+        .withPath(DUMMY_BASE_PATH)
+        .withIndexConfig(HoodieIndexConfig.newBuilder()
+            .bloomFilterNumEntries(1000).bloomFilterFPP(0.00001).build())
+        .withPopulateMetaFields(populateMetaFields)
+        .build();
+    Configuration conf = new Configuration();
+    TaskContextSupplier mockTaskContextSupplier = Mockito.mock(TaskContextSupplier.class);
+    Supplier<Integer> partitionSupplier = Mockito.mock(Supplier.class);
+    when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier);
+    when(partitionSupplier.get()).thenReturn(10);
+
+    return HoodieFileWriterFactory.newHFileFileWriter(
+        instantTime, getFilePath(), writeConfig, avroSchema, conf, mockTaskContextSupplier);
+  }
+
+  @Override
+  protected HoodieFileReader<GenericRecord> createReader(
+      Configuration conf) throws Exception {
+    CacheConfig cacheConfig = new CacheConfig(conf);
+    return new HoodieHFileReader<>(conf, getFilePath(), cacheConfig, getFilePath().getFileSystem(conf));
+  }
+
+  @Override
+  protected void verifyMetadata(Configuration conf) throws IOException {
+    FileSystem fs = getFilePath().getFileSystem(conf);
+    HFile.Reader hfileReader = HoodieHFileUtils.createHFileReader(fs, getFilePath(), new CacheConfig(conf), conf);
+    assertEquals(HFILE_COMPARATOR.getClass(), hfileReader.getComparator().getClass());
+    assertEquals(NUM_RECORDS, hfileReader.getEntries());
+  }
+
+  @Override
+  protected void verifySchema(Configuration conf, String schemaPath) throws IOException {
+    FileSystem fs = getFilePath().getFileSystem(conf);
+    HFile.Reader hfileReader = HoodieHFileUtils.createHFileReader(fs, getFilePath(), new CacheConfig(conf), conf);
+    assertEquals(getSchemaFromResource(TestHoodieHFileReaderWriter.class, schemaPath),
+        new Schema.Parser().parse(new String(hfileReader.getHFileInfo().get(SCHEMA_KEY.getBytes()))));
   }
 
   private static Stream<Arguments> populateMetaFieldsAndTestAvroWithMeta() {
@@ -94,27 +139,13 @@ private static Stream<Arguments> populateMetaFieldsAndTestAvroWithMeta() {
     }).map(Arguments::of);
   }
 
-  private HoodieHFileWriter createHFileWriter(Schema avroSchema, boolean populateMetaFields) throws Exception {
-    BloomFilter filter = BloomFilterFactory.createBloomFilter(1000, 0.00001, -1, BloomFilterTypeCode.SIMPLE.name());
-    Configuration conf = new Configuration();
-    TaskContextSupplier mockTaskContextSupplier = Mockito.mock(TaskContextSupplier.class);
-    Supplier<Integer> partitionSupplier = Mockito.mock(Supplier.class);
-    when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier);
-    when(partitionSupplier.get()).thenReturn(10);
-    String instantTime = "000";
-
-    HoodieHFileConfig hoodieHFileConfig = new HoodieHFileConfig(conf, Compression.Algorithm.GZ, 1024 * 1024, 120 * 1024 * 1024,
-        HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR);
-    return new HoodieHFileWriter(instantTime, filePath, hoodieHFileConfig, avroSchema, mockTaskContextSupplier, populateMetaFields);
-  }
-
   @ParameterizedTest
   @MethodSource("populateMetaFieldsAndTestAvroWithMeta")
-  public void testWriteReadHFile(boolean populateMetaFields, boolean testAvroWithMeta) throws Exception {
+  public void testWriteReadHFileWithMetaFields(boolean populateMetaFields, boolean testAvroWithMeta) throws Exception {
     Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithMetaFields.avsc");
-    HoodieHFileWriter writer = createHFileWriter(avroSchema, populateMetaFields);
+    HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, populateMetaFields);
     List<String> keys = new ArrayList<>();
-    Map<String, GenericRecord> recordMap = new HashMap<>();
+    Map<String, GenericRecord> recordMap = new TreeMap<>();
     for (int i = 0; i < 100; i++) {
       GenericRecord record = new GenericData.Record(avroSchema);
       String key = String.format("%s%04d", "key", i);
@@ -134,32 +165,174 @@ public void testWriteReadHFile(boolean populateMetaFields, boolean testAvroWithM
     writer.close();
 
     Configuration conf = new Configuration();
-    CacheConfig cacheConfig = new CacheConfig(conf);
-    HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(conf, filePath, cacheConfig, filePath.getFileSystem(conf));
-    List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
-    records.forEach(entry -> assertEquals(entry.getSecond(), recordMap.get(entry.getFirst())));
+    HoodieHFileReader hoodieHFileReader = (HoodieHFileReader) createReader(conf);
+    List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
+    assertEquals(new ArrayList<>(recordMap.values()), records);
+
     hoodieHFileReader.close();
 
     for (int i = 0; i < 2; i++) {
       int randomRowstoFetch = 5 + RANDOM.nextInt(10);
       Set<String> rowsToFetch = getRandomKeys(randomRowstoFetch, keys);
+
       List<String> rowsList = new ArrayList<>(rowsToFetch);
       Collections.sort(rowsList);
-      hoodieHFileReader = new HoodieHFileReader(conf, filePath, cacheConfig, filePath.getFileSystem(conf));
-      List<Pair<String, GenericRecord>> result = hoodieHFileReader.readRecords(rowsList);
-      assertEquals(result.size(), randomRowstoFetch);
+
+      List<GenericRecord> expectedRecords = rowsList.stream().map(recordMap::get).collect(Collectors.toList());
+
+      hoodieHFileReader = (HoodieHFileReader<GenericRecord>) createReader(conf);
+      List<GenericRecord> result = HoodieHFileReader.readRecords(hoodieHFileReader, rowsList);
+
+      assertEquals(expectedRecords, result);
+
       result.forEach(entry -> {
-        assertEquals(entry.getSecond(), recordMap.get(entry.getFirst()));
         if (populateMetaFields && testAvroWithMeta) {
-          assertNotNull(entry.getSecond().get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+          assertNotNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
         } else {
-          assertNull(entry.getSecond().get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+          assertNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
         }
       });
       hoodieHFileReader.close();
     }
   }
 
+  @Override
+  @Test
+  public void testWriteReadWithEvolvedSchema() throws Exception {
+    // Disable the test with evolved schema for HFile since it's not supported
+    // TODO(HUDI-3683): fix the schema evolution for HFile
+  }
+
+  @Test
+  public void testReadHFileFormatRecords() throws Exception {
+    writeFileWithSimpleSchema();
+    FileSystem fs = FSUtils.getFs(getFilePath().toString(), new Configuration());
+    byte[] content = FileIOUtils.readAsByteArray(
+        fs.open(getFilePath()), (int) fs.getFileStatus(getFilePath()).getLen());
+    // Reading byte array in HFile format, without actual file path
+    HoodieHFileReader<GenericRecord> hfileReader =
+        new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    assertEquals(NUM_RECORDS, hfileReader.getTotalRecords());
+    verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
+  }
+
+  @Test
+  public void testReaderGetRecordIterator() throws Exception {
+    writeFileWithSimpleSchema();
+    HoodieHFileReader<GenericRecord> hfileReader =
+        (HoodieHFileReader<GenericRecord>) createReader(new Configuration());
+    List<String> keys =
+        IntStream.concat(IntStream.range(40, NUM_RECORDS * 2), IntStream.range(10, 20))
+            .mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toList());
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    Iterator<GenericRecord> iterator = hfileReader.getRecordsByKeysIterator(keys, avroSchema);
+
+    List<Integer> expectedIds =
+        IntStream.concat(IntStream.range(40, NUM_RECORDS), IntStream.range(10, 20))
+            .boxed().collect(Collectors.toList());
+    int index = 0;
+    while (iterator.hasNext()) {
+      GenericRecord record = iterator.next();
+      String key = "key" + String.format("%02d", expectedIds.get(index));
+      assertEquals(key, record.get("_row_key").toString());
+      assertEquals(Integer.toString(expectedIds.get(index)), record.get("time").toString());
+      assertEquals(expectedIds.get(index), record.get("number"));
+      index++;
+    }
+  }
+
+  @Test
+  public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception {
+    writeFileWithSimpleSchema();
+    HoodieHFileReader<GenericRecord> hfileReader =
+        (HoodieHFileReader<GenericRecord>) createReader(new Configuration());
+
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+
+    List<String> keyPrefixes = Collections.singletonList("key");
+    Iterator<GenericRecord> iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(keyPrefixes, avroSchema);
+
+    List<GenericRecord> recordsByPrefix = toStream(iterator).collect(Collectors.toList());
+
+    List<GenericRecord> allRecords = toStream(hfileReader.getRecordIterator()).collect(Collectors.toList());
+
+    assertEquals(allRecords, recordsByPrefix);
+
+    // filter for "key1" : entries from key10 to key19 should be matched
+    List<GenericRecord> expectedKey1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")).collect(Collectors.toList());
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key1"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(expectedKey1s, recordsByPrefix);
+
+    // exact match
+    List<GenericRecord> expectedKey25 = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key25")).collect(Collectors.toList());
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key25"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(expectedKey25, recordsByPrefix);
+
+    // no match. key prefix is beyond entries in file.
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key99"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(Collections.emptyList(), recordsByPrefix);
+
+    // no match. but keyPrefix is in between the entries found in file.
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key1234"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(Collections.emptyList(), recordsByPrefix);
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {
+      "/hudi_0_9_hbase_1_2_3", "/hudi_0_10_hbase_1_2_3", "/hudi_0_11_hbase_2_4_9"})
+  public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException {
+    // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadPrimitiveRecord()
+    // using different Hudi releases
+    String simpleHFile = hfilePrefix + SIMPLE_SCHEMA_HFILE_SUFFIX;
+    // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadComplexRecord()
+    // using different Hudi releases
+    String complexHFile = hfilePrefix + COMPLEX_SCHEMA_HFILE_SUFFIX;
+    // This fixture is generated from TestBootstrapIndex#testBootstrapIndex()
+    // using different Hudi releases.  The file is copied from .hoodie/.aux/.bootstrap/.partitions/
+    String bootstrapIndexFile = hfilePrefix + BOOTSTRAP_INDEX_HFILE_SUFFIX;
+
+    FileSystem fs = FSUtils.getFs(getFilePath().toString(), new Configuration());
+    byte[] content = readHFileFromResources(simpleHFile);
+    verifyHFileReader(
+        HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
+        hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE);
+    HoodieHFileReader<GenericRecord> hfileReader =
+        new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords());
+    verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
+
+    content = readHFileFromResources(complexHFile);
+    verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
+        hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE);
+    hfileReader = new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
+    avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithUDT.avsc");
+    assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords());
+    verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
+
+    content = readHFileFromResources(bootstrapIndexFile);
+    verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
+        hfilePrefix, false, HFileBootstrapIndex.HoodieKVComparator.class, 4);
+  }
+
   private Set<String> getRandomKeys(int count, List<String> keys) {
     Set<String> rowKeys = new HashSet<>();
     int totalKeys = keys.size();
@@ -171,4 +344,26 @@ private Set<String> getRandomKeys(int count, List<String> keys) {
     }
     return rowKeys;
   }
+
+  private byte[] readHFileFromResources(String filename) throws IOException {
+    long size = TestHoodieHFileReaderWriter.class
+        .getResource(filename).openConnection().getContentLength();
+    return FileIOUtils.readAsByteArray(
+        TestHoodieHFileReaderWriter.class.getResourceAsStream(filename), (int) size);
+  }
+
+  private void verifyHFileReader(
+      HFile.Reader reader, String hfileName, boolean mayUseDefaultComparator,
+      Class<?> clazz, int count) {
+    // HFile version is 3
+    assertEquals(3, reader.getTrailer().getMajorVersion());
+    if (mayUseDefaultComparator && hfileName.contains("hudi_0_9")) {
+      // Pre Hudi 0.10, the default comparator is used for metadata table HFiles
+      // For bootstrap index HFiles, the custom comparator is always used
+      assertEquals(CellComparatorImpl.class, reader.getComparator().getClass());
+    } else {
+      assertEquals(clazz, reader.getComparator().getClass());
+    }
+    assertEquals(count, reader.getEntries());
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
index 68143a215c51c..282f102697d78 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
@@ -18,53 +18,40 @@
 
 package org.apache.hudi.io.storage;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.BloomFilterFactory;
 import org.apache.hudi.common.bloom.BloomFilterTypeCode;
 import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.config.HoodieStorageConfig;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.orc.CompressionKind;
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 
-import java.io.File;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
+import java.io.IOException;
 
 import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY;
 import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER;
 import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER;
-import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
 import static org.apache.hudi.io.storage.HoodieOrcConfig.AVRO_SCHEMA_METADATA_KEY;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestHoodieOrcReaderWriter {
-  private final Path filePath = new Path(System.getProperty("java.io.tmpdir") + "/f1_1-0-1_000.orc");
+public class TestHoodieOrcReaderWriter extends TestHoodieReaderWriterBase {
 
-  @BeforeEach
-  @AfterEach
-  public void clearTempFile() {
-    File file = new File(filePath.toString());
-    if (file.exists()) {
-      file.delete();
-    }
+  @Override
+  protected Path getFilePath() {
+    return new Path(tempDir.toString() + "/f1_1-0-1_000.orc");
   }
 
-  private HoodieOrcWriter createOrcWriter(Schema avroSchema) throws Exception {
+  @Override
+  protected HoodieFileWriter<GenericRecord> createWriter(
+      Schema avroSchema, boolean populateMetaFields) throws Exception {
     BloomFilter filter = BloomFilterFactory.createBloomFilter(1000, 0.00001, -1, BloomFilterTypeCode.SIMPLE.name());
     Configuration conf = new Configuration();
     int orcStripSize = Integer.parseInt(HoodieStorageConfig.ORC_STRIPE_SIZE.defaultValue());
@@ -73,189 +60,41 @@ private HoodieOrcWriter createOrcWriter(Schema avroSchema) throws Exception {
     HoodieOrcConfig config = new HoodieOrcConfig(conf, CompressionKind.ZLIB, orcStripSize, orcBlockSize, maxFileSize, filter);
     TaskContextSupplier mockTaskContextSupplier = Mockito.mock(TaskContextSupplier.class);
     String instantTime = "000";
-    return new HoodieOrcWriter(instantTime, filePath, config, avroSchema, mockTaskContextSupplier);
+    return new HoodieOrcWriter<>(instantTime, getFilePath(), config, avroSchema, mockTaskContextSupplier);
   }
 
-  @Test
-  public void testWriteReadMetadata() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
+  @Override
+  protected HoodieFileReader<GenericRecord> createReader(
+      Configuration conf) throws Exception {
+    return HoodieFileReaderFactory.getFileReader(conf, getFilePath());
+  }
 
-    Configuration conf = new Configuration();
-    Reader orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf));
+  @Override
+  protected void verifyMetadata(Configuration conf) throws IOException {
+    Reader orcReader = OrcFile.createReader(getFilePath(), OrcFile.readerOptions(conf));
     assertEquals(4, orcReader.getMetadataKeys().size());
     assertTrue(orcReader.getMetadataKeys().contains(HOODIE_MIN_RECORD_KEY_FOOTER));
     assertTrue(orcReader.getMetadataKeys().contains(HOODIE_MAX_RECORD_KEY_FOOTER));
     assertTrue(orcReader.getMetadataKeys().contains(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY));
     assertTrue(orcReader.getMetadataKeys().contains(AVRO_SCHEMA_METADATA_KEY));
     assertEquals(CompressionKind.ZLIB.name(), orcReader.getCompressionKind().toString());
-
-    HoodieFileReader<GenericRecord> hoodieReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
-    BloomFilter filter = hoodieReader.readBloomFilter();
-    for (int i = 0; i < 3; i++) {
-      assertTrue(filter.mightContain("key" + i));
-    }
-    assertFalse(filter.mightContain("non-existent-key"));
-    assertEquals(3, hoodieReader.getTotalRecords());
-    String[] minMaxRecordKeys = hoodieReader.readMinMaxRecordKeys();
-    assertEquals(2, minMaxRecordKeys.length);
-    assertEquals("key0", minMaxRecordKeys[0]);
-    assertEquals("key2", minMaxRecordKeys[1]);
-  }
-
-  @Test
-  public void testWriteReadPrimitiveRecord() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    Reader orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf));
-    assertEquals("struct<_row_key:string,time:string,number:int>", orcReader.getSchema().toString());
-    assertEquals(3, orcReader.getNumberOfRows());
-
-    HoodieFileReader<GenericRecord> hoodieReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator();
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      index++;
-    }
+    assertEquals(NUM_RECORDS, orcReader.getNumberOfRows());
   }
 
-  @Test
-  public void testWriteReadComplexRecord() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithUDT.avsc");
-    Schema udtSchema = avroSchema.getField("driver").schema().getTypes().get(1);
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      GenericRecord innerRecord = new GenericData.Record(udtSchema);
-      innerRecord.put("driver_name", "driver" + i);
-      innerRecord.put("list", Collections.singletonList(i));
-      innerRecord.put("map", Collections.singletonMap("key" + i, "value" + i));
-      record.put("driver", innerRecord);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf));
-    assertEquals("struct<_row_key:string,time:string,number:int,driver:struct<driver_name:string,list:array<int>,map:map<string,string>>>",
-        reader.getSchema().toString());
-    assertEquals(3, reader.getNumberOfRows());
-
-    HoodieFileReader<GenericRecord> hoodieReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator();
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      GenericRecord innerRecord = (GenericRecord) record.get("driver");
-      assertEquals("driver" + index, innerRecord.get("driver_name").toString());
-      assertEquals(1, ((List<?>)innerRecord.get("list")).size());
-      assertEquals(index, ((List<?>)innerRecord.get("list")).get(0));
-      assertEquals("value" + index, ((Map<?,?>)innerRecord.get("map")).get("key" + index).toString());
-      index++;
+  @Override
+  protected void verifySchema(Configuration conf, String schemaPath) throws IOException {
+    Reader orcReader = OrcFile.createReader(getFilePath(), OrcFile.readerOptions(conf));
+    if ("/exampleSchema.avsc".equals(schemaPath)) {
+      assertEquals("struct<_row_key:string,time:string,number:int>",
+          orcReader.getSchema().toString());
+    } else if ("/exampleSchemaWithUDT.avsc".equals(schemaPath)) {
+      assertEquals("struct<_row_key:string,time:string,number:int,driver:struct<driver_name:string,list:array<int>,map:map<string,string>>>",
+          orcReader.getSchema().toString());
     }
   }
 
-  @Test
-  public void testWriteReadWithEvolvedSchema() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    HoodieFileReader<GenericRecord> hoodieReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Schema evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleEvolvedSchema.avsc");
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator(evolvedSchema);
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleEvolvedSchemaChangeOrder.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleEvolvedSchemaColumnRequire.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleEvolvedSchemaColumnType.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(Integer.toString(index), record.get("number").toString());
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleEvolvedSchemaDeleteColumn.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertNull(record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
+  @Override
+  public void testReaderFilterRowKeys() {
+    // TODO(HUDI-3682): fix filterRowKeys test for ORC due to a bug in ORC logic
   }
 }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java
new file mode 100644
index 0000000000000..4617eb93a66e7
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hudi.common.bloom.BloomFilter;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Abstract class for unit tests of {@link HoodieFileReader} and {@link HoodieFileWriter}
+ * for different file format
+ */
+public abstract class TestHoodieReaderWriterBase {
+  protected static final int NUM_RECORDS = 50;
+  @TempDir
+  protected File tempDir;
+
+  protected abstract Path getFilePath();
+
+  protected abstract HoodieFileWriter<GenericRecord> createWriter(
+      Schema avroSchema, boolean populateMetaFields) throws Exception;
+
+  protected abstract HoodieFileReader<GenericRecord> createReader(
+      Configuration conf) throws Exception;
+
+  protected abstract void verifyMetadata(Configuration conf) throws IOException;
+
+  protected abstract void verifySchema(Configuration conf, String schemaPath) throws IOException;
+
+  @BeforeEach
+  @AfterEach
+  public void clearTempFile() {
+    File file = new File(getFilePath().toString());
+    if (file.exists()) {
+      file.delete();
+    }
+  }
+
+  @Test
+  public void testWriteReadMetadata() throws Exception {
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    writeFileWithSimpleSchema();
+
+    Configuration conf = new Configuration();
+    verifyMetadata(conf);
+
+    HoodieFileReader<GenericRecord> hoodieReader = createReader(conf);
+    BloomFilter filter = hoodieReader.readBloomFilter();
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      String key = "key" + String.format("%02d", i);
+      assertTrue(filter.mightContain(key));
+    }
+    assertFalse(filter.mightContain("non-existent-key"));
+    assertEquals(avroSchema, hoodieReader.getSchema());
+    assertEquals(NUM_RECORDS, hoodieReader.getTotalRecords());
+    String[] minMaxRecordKeys = hoodieReader.readMinMaxRecordKeys();
+    assertEquals(2, minMaxRecordKeys.length);
+    assertEquals("key00", minMaxRecordKeys[0]);
+    assertEquals("key" + (NUM_RECORDS - 1), minMaxRecordKeys[1]);
+  }
+
+  @Test
+  public void testWriteReadPrimitiveRecord() throws Exception {
+    String schemaPath = "/exampleSchema.avsc";
+    writeFileWithSimpleSchema();
+
+    Configuration conf = new Configuration();
+    verifyMetadata(conf);
+    verifySchema(conf, schemaPath);
+    verifySimpleRecords(createReader(conf).getRecordIterator());
+  }
+
+  @Test
+  public void testWriteReadComplexRecord() throws Exception {
+    String schemaPath = "/exampleSchemaWithUDT.avsc";
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, schemaPath);
+    Schema udtSchema = avroSchema.getField("driver").schema().getTypes().get(1);
+    HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, true);
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      GenericRecord record = new GenericData.Record(avroSchema);
+      String key = "key" + String.format("%02d", i);
+      record.put("_row_key", key);
+      record.put("time", Integer.toString(i));
+      record.put("number", i);
+      GenericRecord innerRecord = new GenericData.Record(udtSchema);
+      innerRecord.put("driver_name", "driver" + i);
+      innerRecord.put("list", Collections.singletonList(i));
+      innerRecord.put("map", Collections.singletonMap(key, "value" + i));
+      record.put("driver", innerRecord);
+      writer.writeAvro(key, record);
+    }
+    writer.close();
+
+    Configuration conf = new Configuration();
+    verifyMetadata(conf);
+    verifySchema(conf, schemaPath);
+    verifyComplexRecords(createReader(conf).getRecordIterator());
+  }
+
+  @Test
+  public void testWriteReadWithEvolvedSchema() throws Exception {
+    writeFileWithSimpleSchema();
+
+    Configuration conf = new Configuration();
+    HoodieFileReader<GenericRecord> hoodieReader = createReader(conf);
+    String[] schemaList = new String[] {
+        "/exampleEvolvedSchema.avsc", "/exampleEvolvedSchemaChangeOrder.avsc",
+        "/exampleEvolvedSchemaColumnRequire.avsc", "/exampleEvolvedSchemaColumnType.avsc",
+        "/exampleEvolvedSchemaDeleteColumn.avsc"};
+
+    for (String evolvedSchemaPath : schemaList) {
+      verifyReaderWithSchema(evolvedSchemaPath, hoodieReader);
+    }
+  }
+
+  @Test
+  public void testReaderFilterRowKeys() throws Exception {
+    writeFileWithSimpleSchema();
+    Configuration conf = new Configuration();
+    verifyMetadata(conf);
+    verifyFilterRowKeys(createReader(conf));
+  }
+
+  protected void writeFileWithSimpleSchema() throws Exception {
+    Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, true);
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      GenericRecord record = new GenericData.Record(avroSchema);
+      String key = "key" + String.format("%02d", i);
+      record.put("_row_key", key);
+      record.put("time", Integer.toString(i));
+      record.put("number", i);
+      writer.writeAvro(key, record);
+    }
+    writer.close();
+  }
+
+  protected void verifySimpleRecords(Iterator<GenericRecord> iterator) {
+    int index = 0;
+    while (iterator.hasNext()) {
+      GenericRecord record = iterator.next();
+      String key = "key" + String.format("%02d", index);
+      assertEquals(key, record.get("_row_key").toString());
+      assertEquals(Integer.toString(index), record.get("time").toString());
+      assertEquals(index, record.get("number"));
+      index++;
+    }
+  }
+
+  protected void verifyComplexRecords(Iterator<GenericRecord> iterator) {
+    int index = 0;
+    while (iterator.hasNext()) {
+      GenericRecord record = iterator.next();
+      String key = "key" + String.format("%02d", index);
+      assertEquals(key, record.get("_row_key").toString());
+      assertEquals(Integer.toString(index), record.get("time").toString());
+      assertEquals(index, record.get("number"));
+      GenericRecord innerRecord = (GenericRecord) record.get("driver");
+      assertEquals("driver" + index, innerRecord.get("driver_name").toString());
+      assertEquals(1, ((List<?>) innerRecord.get("list")).size());
+      assertEquals(index, ((List<?>) innerRecord.get("list")).get(0));
+      Map<?, ?> mapping = (Map<?, ?>) innerRecord.get("map");
+      boolean match = false;
+      for (Object innerKey : mapping.keySet()) {
+        // The innerKey may not be in the type of String, so we have to
+        // use the following logic for validation
+        if (innerKey.toString().equals(key)) {
+          assertEquals("value" + index, mapping.get(innerKey).toString());
+          match = true;
+        }
+      }
+      assertTrue(match);
+      index++;
+    }
+  }
+
+  private void verifyFilterRowKeys(HoodieFileReader<GenericRecord> hoodieReader) {
+    Set<String> candidateRowKeys = IntStream.range(40, NUM_RECORDS * 2)
+        .mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toCollection(TreeSet::new));
+    List<String> expectedKeys = IntStream.range(40, NUM_RECORDS)
+        .mapToObj(i -> "key" + String.format("%02d", i)).sorted().collect(Collectors.toList());
+    assertEquals(expectedKeys, hoodieReader.filterRowKeys(candidateRowKeys)
+        .stream().sorted().collect(Collectors.toList()));
+  }
+
+  private void verifyReaderWithSchema(String schemaPath, HoodieFileReader<GenericRecord> hoodieReader) throws IOException {
+    Schema evolvedSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, schemaPath);
+    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator(evolvedSchema);
+    int index = 0;
+    while (iter.hasNext()) {
+      verifyRecord(schemaPath, iter.next(), index);
+      index++;
+    }
+  }
+
+  private void verifyRecord(String schemaPath, GenericRecord record, int index) {
+    String numStr = String.format("%02d", index);
+    assertEquals("key" + numStr, record.get("_row_key").toString());
+    assertEquals(Integer.toString(index), record.get("time").toString());
+    if ("/exampleEvolvedSchemaColumnType.avsc".equals(schemaPath)) {
+      assertEquals(Integer.toString(index), record.get("number").toString());
+    } else if ("/exampleEvolvedSchemaDeleteColumn.avsc".equals(schemaPath)) {
+      assertNull(record.get("number"));
+    } else {
+      assertEquals(index, record.get("number"));
+    }
+    assertNull(record.get("added_field"));
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java
index 415c12a6407c6..9861506909980 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java
@@ -259,7 +259,7 @@ private void createReplace(String instantTime, WriteOperationType writeOperation
 
   private void createCleanMetadata(String instantTime) throws IOException {
     HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(),
-        CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
+        CleanPlanV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>());
     HoodieCleanStat cleanStats = new HoodieCleanStat(
         HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
         HoodieTestUtils.DEFAULT_PARTITION_PATHS[new Random().nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)],
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile
new file mode 100644
index 0000000000000..91e9c7656c39e
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile
new file mode 100644
index 0000000000000..8ce3d0d0b1f6c
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile
new file mode 100644
index 0000000000000..abe0b336eb3c2
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile
new file mode 100644
index 0000000000000..7f6c5bd353d22
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile
new file mode 100644
index 0000000000000..f5293c5a249da
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile
new file mode 100644
index 0000000000000..2b570920f1456
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile
new file mode 100644
index 0000000000000..290af9918e5dc
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile
new file mode 100644
index 0000000000000..5a16f0ea68989
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile differ
diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile b/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile
new file mode 100644
index 0000000000000..e52d3c556e555
Binary files /dev/null and b/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile differ
diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml
index ffe82a0c96b59..eb044312c42c8 100644
--- a/hudi-client/hudi-flink-client/pom.xml
+++ b/hudi-client/hudi-flink-client/pom.xml
@@ -19,16 +19,20 @@
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-flink-client</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-flink-client</name>
   <packaging>jar</packaging>
 
+  <properties>
+    <parquet.version>${flink.format.parquet.version}</parquet.version>
+  </properties>
+
   <dependencies>
     <!-- Hudi  -->
     <dependency>
@@ -87,6 +91,13 @@
     <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-avro</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-column</artifactId>
+      <version>${parquet.version}</version>
     </dependency>
 
     <!-- Hoodie - Test -->
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
index 4523705f32b74..271ba95d941e8 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
@@ -329,12 +329,13 @@ protected List<WriteStatus> postWrite(HoodieWriteMetadata<List<WriteStatus>> res
   protected void postCommit(HoodieTable table,
                             HoodieCommitMetadata metadata,
                             String instantTime,
-                            Option<Map<String, String>> extraMetadata) {
+                            Option<Map<String, String>> extraMetadata,
+                            boolean acquireLockForArchival) {
     try {
       // Delete the marker directory for the instant.
       WriteMarkersFactory.get(config.getMarkersType(), createTable(config, hadoopConf), instantTime)
           .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
-      autoArchiveOnCommit(table);
+      autoArchiveOnCommit(table, acquireLockForArchival);
     } finally {
       this.heartbeatClient.stop(instantTime);
     }
@@ -398,7 +399,7 @@ public HoodieWriteMetadata<List<WriteStatus>> cluster(final String clusteringIns
   }
 
   @Override
-  protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
+  protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime, boolean initialMetadataTableIfNecessary) {
     // Create a Hoodie table which encapsulated the commits and files visible
     return getHoodieTable();
   }
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java
index bbd9b882db7a8..486a5cc54b69a 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java
@@ -94,7 +94,8 @@ public HoodieRowDataCreateHandle(HoodieTable table, HoodieWriteConfig writeConfi
               fs,
               instantTime,
               new Path(writeConfig.getBasePath()),
-              FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath));
+              FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath),
+              table.getPartitionMetafileFormat());
       partitionMetadata.trySave(taskPartitionId);
       createMarkerFile(partitionPath, FSUtils.makeDataFileName(this.instantTime, getWriteToken(), this.fileId, table.getBaseFileExtension()));
       this.fileWriter = createNewFileWriter(path, table, writeConfig, rowType);
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/parquet/ParquetSchemaConverter.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/parquet/ParquetSchemaConverter.java
index 5da45bf25d3f1..66a39b54a910b 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/parquet/ParquetSchemaConverter.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/parquet/ParquetSchemaConverter.java
@@ -34,6 +34,7 @@
 
 import org.apache.flink.table.types.logical.TimestampType;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
@@ -46,6 +47,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit;
+
 /**
  * Schema converter converts Parquet schema to and from Flink internal types.
  *
@@ -436,7 +439,7 @@ private static Type convertField(
             String.format(
                 "Can not convert Flink MapTypeInfo %s to Parquet"
                     + " Map type as key has to be String",
-                typeInfo.toString()));
+                typeInfo));
       }
     } else if (typeInfo instanceof ObjectArrayTypeInfo) {
       ObjectArrayTypeInfo objectArrayTypeInfo = (ObjectArrayTypeInfo) typeInfo;
@@ -567,18 +570,16 @@ private static Type convertToParquetType(
         int numBytes = computeMinBytesForDecimalPrecision(precision);
         return Types.primitive(
             PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition)
-            .precision(precision)
-            .scale(scale)
+            .as(LogicalTypeAnnotation.decimalType(scale, precision))
             .length(numBytes)
-            .as(OriginalType.DECIMAL)
             .named(name);
       case TINYINT:
         return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
-            .as(OriginalType.INT_8)
+            .as(LogicalTypeAnnotation.intType(8, true))
             .named(name);
       case SMALLINT:
         return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
-            .as(OriginalType.INT_16)
+            .as(LogicalTypeAnnotation.intType(16, true))
             .named(name);
       case INTEGER:
         return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
@@ -594,16 +595,17 @@ private static Type convertToParquetType(
             .named(name);
       case DATE:
         return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
-            .as(OriginalType.DATE)
+            .as(LogicalTypeAnnotation.dateType())
             .named(name);
       case TIME_WITHOUT_TIME_ZONE:
         return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
-            .as(OriginalType.TIME_MILLIS)
+            .as(LogicalTypeAnnotation.timeType(true, TimeUnit.MILLIS))
             .named(name);
       case TIMESTAMP_WITHOUT_TIME_ZONE:
         TimestampType timestampType = (TimestampType) type;
         if (timestampType.getPrecision() == 3) {
           return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
+              .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS))
               .named(name);
         } else {
           return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition)
@@ -613,6 +615,7 @@ private static Type convertToParquetType(
         LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) type;
         if (localZonedTimestampType.getPrecision() == 3) {
           return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
+              .as(LogicalTypeAnnotation.timestampType(false, TimeUnit.MILLIS))
               .named(name);
         } else {
           return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition)
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
index aeb546b0ca5c6..76774e9618d79 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.metadata;
 
-import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hudi.client.HoodieFlinkWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieData;
@@ -32,7 +31,9 @@
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieMetadataException;
+import org.apache.hudi.exception.HoodieNotSupportedException;
 
+import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -108,22 +109,35 @@ protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<
     List<HoodieRecord> preppedRecordList = HoodieList.getList(preppedRecords);
 
     try (HoodieFlinkWriteClient writeClient = new HoodieFlinkWriteClient(engineContext, metadataWriteConfig)) {
-      if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
+      if (canTriggerTableService) {
+        // trigger compaction before doing the delta commit. this is to ensure, if this delta commit succeeds in metadata table, but failed in data table,
+        // we would have compacted metadata table and so could have included uncommitted data which will never be ignored while reading from metadata
+        // table (since reader will filter out only from delta commits)
+        compactIfNecessary(writeClient, instantTime);
+      }
+
+      if (!metadataMetaClient.getActiveTimeline().containsInstant(instantTime)) {
         // if this is a new commit being applied to metadata for the first time
         writeClient.startCommitWithTime(instantTime);
         metadataMetaClient.getActiveTimeline().transitionRequestedToInflight(HoodieActiveTimeline.DELTA_COMMIT_ACTION, instantTime);
       } else {
-        // this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
-        // for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
-        // when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
-        // are upserts to metadata table and so only a new delta commit will be created.
-        // once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
-        // already part of completed commit. So, we have to manually remove the completed instant and proceed.
-        // and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
-        HoodieInstant alreadyCompletedInstant =
-            metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
-        HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
-        metadataMetaClient.reloadActiveTimeline();
+        Option<HoodieInstant> alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant();
+        if (alreadyCompletedInstant.isPresent()) {
+          // this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
+          // for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
+          // when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
+          // are upserts to metadata table and so only a new delta commit will be created.
+          // once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
+          // already part of completed commit. So, we have to manually remove the completed instant and proceed.
+          // and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
+          HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant.get());
+          metadataMetaClient.reloadActiveTimeline();
+        }
+        // If the alreadyCompletedInstant is empty, that means there is a requested or inflight
+        // instant with the same instant time.  This happens for data table clean action which
+        // reuses the same instant time without rollback first.  It is a no-op here as the
+        // clean plan is the same, so we don't need to delete the requested and inflight instant
+        // files in the active timeline.
       }
 
       List<WriteStatus> statuses = preppedRecordList.size() > 0
@@ -140,7 +154,6 @@ protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<
       // reload timeline
       metadataMetaClient.reloadActiveTimeline();
       if (canTriggerTableService) {
-        compactIfNecessary(writeClient, instantTime);
         cleanIfNecessary(writeClient, instantTime);
         writeClient.archive();
       }
@@ -149,4 +162,9 @@ protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<
     // Update total size of the metadata and count of base/log files
     metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
   }
+
+  @Override
+  public void deletePartitions(String instantTime, List<MetadataPartitionType> partitions) {
+    throw new HoodieNotSupportedException("Dropping metadata index not supported for Flink metadata table yet.");
+  }
 }
\ No newline at end of file
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
index 14937d6fee244..9ab633f9e3b37 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
@@ -22,6 +22,8 @@
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -34,7 +36,6 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -49,6 +50,7 @@
 import org.apache.hudi.io.HoodieWriteHandle;
 import org.apache.hudi.keygen.BaseKeyGenerator;
 import org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.action.clean.CleanActionExecutor;
@@ -63,12 +65,9 @@
 import org.apache.hudi.table.action.commit.FlinkUpsertPreppedCommitActionExecutor;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.annotation.Nonnull;
-
 import java.io.IOException;
 import java.util.Collections;
 import java.util.Iterator;
@@ -246,11 +245,6 @@ public HoodieWriteMetadata deletePartitions(HoodieEngineContext context, String
     throw new HoodieNotSupportedException("DeletePartitions is not supported yet");
   }
 
-  @Override
-  public void updateMetadataIndexes(@Nonnull HoodieEngineContext context, @Nonnull List<HoodieWriteStat> stats, @Nonnull String instantTime) {
-    throw new HoodieNotSupportedException("update statistics is not supported yet");
-  }
-
   @Override
   public HoodieWriteMetadata<List<WriteStatus>> upsertPrepped(HoodieEngineContext context, String instantTime, List<HoodieRecord<T>> preppedRecords) {
     throw new HoodieNotSupportedException("This method should not be invoked");
@@ -339,6 +333,16 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context, String rollb
     return new CopyOnWriteRollbackActionExecutor(context, config, this, rollbackInstantTime, commitInstant, deleteInstants, skipLocking).execute();
   }
 
+  @Override
+  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
+    throw new HoodieNotSupportedException("Metadata indexing is not supported for a Flink table yet.");
+  }
+
+  @Override
+  public Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime) {
+    throw new HoodieNotSupportedException("Metadata indexing is not supported for a Flink table yet.");
+  }
+
   @Override
   public HoodieSavepointMetadata savepoint(HoodieEngineContext context, String instantToSavepoint, String user, String comment) {
     throw new HoodieNotSupportedException("Savepoint is not supported yet");
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
index 2f08a55c956fb..f1e43b9d30d42 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.table;
 
-import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.HoodieFlinkEngineContext;
 import org.apache.hudi.common.data.HoodieData;
@@ -37,6 +36,8 @@
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 
+import org.apache.avro.specific.SpecificRecordBase;
+
 import java.util.List;
 
 import static org.apache.hudi.common.data.HoodieList.getList;
@@ -104,9 +105,13 @@ protected HoodieIndex getIndex(HoodieWriteConfig config, HoodieEngineContext con
   public <T extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp,
                                                                                             Option<T> actionMetadata) {
     if (config.isMetadataTableEnabled()) {
+      // even with metadata enabled, some index could have been disabled
+      // delete metadata partitions corresponding to such indexes
+      deleteMetadataIndexIfNecessary();
       return Option.of(FlinkHoodieBackedTableMetadataWriter.create(context.getHadoopConf().get(), config,
           context, actionMetadata, Option.of(triggeringInstantTimestamp)));
     } else {
+      maybeDeleteMetadataTable();
       return Option.empty();
     }
   }
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
index 66723a3fcdb7a..9c17e77b91831 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
@@ -102,7 +102,7 @@ public List<HoodieRecord<T>> deduplicateRecords(
       // we cannot allow the user to change the key or partitionPath, since that will affect
       // everything
       // so pick it from one of the records.
-      boolean choosePrev = data1.equals(reducedData);
+      boolean choosePrev = data1 == reducedData;
       HoodieKey reducedKey = choosePrev ? rec1.getKey() : rec2.getKey();
       HoodieOperation operation = choosePrev ? rec1.getOperation() : rec2.getOperation();
       HoodieRecord<T> hoodieRecord = new HoodieAvroRecord<>(reducedKey, reducedData, operation);
diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/parquet/TestParquetSchemaConverter.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/parquet/TestParquetSchemaConverter.java
index 5305bcc8aba74..a1a07a65f9931 100644
--- a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/parquet/TestParquetSchemaConverter.java
+++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/parquet/TestParquetSchemaConverter.java
@@ -51,24 +51,41 @@ void testConvertComplexTypes() {
     final String expected = "message converted {\n"
         + "  optional group f_array (LIST) {\n"
         + "    repeated group list {\n"
-        + "      optional binary element (UTF8);\n"
+        + "      optional binary element (STRING);\n"
         + "    }\n"
         + "  }\n"
         + "  optional group f_map (MAP) {\n"
         + "    repeated group key_value {\n"
         + "      optional int32 key;\n"
-        + "      optional binary value (UTF8);\n"
+        + "      optional binary value (STRING);\n"
         + "    }\n"
         + "  }\n"
         + "  optional group f_row {\n"
         + "    optional int32 f_row_f0;\n"
-        + "    optional binary f_row_f1 (UTF8);\n"
+        + "    optional binary f_row_f1 (STRING);\n"
         + "    optional group f_row_f2 {\n"
         + "      optional int32 f_row_f2_f0;\n"
-        + "      optional binary f_row_f2_f1 (UTF8);\n"
+        + "      optional binary f_row_f2_f1 (STRING);\n"
         + "    }\n"
         + "  }\n"
         + "}\n";
     assertThat(messageType.toString(), is(expected));
   }
+
+  @Test
+  void testConvertTimestampTypes() {
+    DataType dataType = DataTypes.ROW(
+        DataTypes.FIELD("ts_3", DataTypes.TIMESTAMP(3)),
+        DataTypes.FIELD("ts_6", DataTypes.TIMESTAMP(6)),
+        DataTypes.FIELD("ts_9", DataTypes.TIMESTAMP(9)));
+    org.apache.parquet.schema.MessageType messageType =
+        ParquetSchemaConverter.convertToParquetMessageType("converted", (RowType) dataType.getLogicalType());
+    assertThat(messageType.getColumns().size(), is(3));
+    final String expected = "message converted {\n"
+        + "  optional int64 ts_3 (TIMESTAMP(MILLIS,true));\n"
+        + "  optional int96 ts_6;\n"
+        + "  optional int96 ts_9;\n"
+        + "}\n";
+    assertThat(messageType.toString(), is(expected));
+  }
 }
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
index 3471bfb8ba366..068bf48282040 100644
--- a/hudi-client/hudi-java-client/pom.xml
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -19,12 +19,12 @@
     <parent>
         <artifactId>hudi-client</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-java-client</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
 
     <name>hudi-java-client</name>
     <packaging>jar</packaging>
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
index faf46e08d5515..7f5dc19baf274 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
@@ -200,7 +200,7 @@ protected List<WriteStatus> postWrite(HoodieWriteMetadata<List<WriteStatus>> res
             result.getWriteStats().get().size());
       }
 
-      postCommit(hoodieTable, result.getCommitMetadata().get(), instantTime, Option.empty());
+      postCommit(hoodieTable, result.getCommitMetadata().get(), instantTime, Option.empty(), true);
 
       emitCommitMetrics(instantTime, result.getCommitMetadata().get(), hoodieTable.getMetaClient().getCommitActionType());
     }
@@ -233,7 +233,7 @@ public HoodieWriteMetadata<List<WriteStatus>> cluster(final String clusteringIns
   }
 
   @Override
-  protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
+  protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime, boolean initialMetadataTableIfNecessary) {
     // new JavaUpgradeDowngrade(metaClient, config, context).run(metaClient, HoodieTableVersion.current(), config, context, instantTime);
 
     // Create a Hoodie table which encapsulated the commits and files visible
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java
index 7751833fc04b0..d34673c2d9b9a 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java
@@ -37,7 +37,6 @@
 
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 
 /**
  * Clustering Strategy based on following.
@@ -60,13 +59,12 @@ public List<WriteStatus> performClusteringWithRecordList(
       final String instantTime, final Map<String, String> strategyParams, final Schema schema,
       final List<HoodieFileGroupId> fileGroupIdList, final boolean preserveHoodieMetadata) {
     LOG.info("Starting clustering for a group, parallelism:" + numOutputGroups + " commit:" + instantTime);
-    Properties props = getWriteConfig().getProps();
-    props.put(HoodieWriteConfig.BULKINSERT_PARALLELISM_VALUE.key(), String.valueOf(numOutputGroups));
-    // We are calling another action executor - disable auto commit. Strategy is only expected to write data in new files.
-    props.put(HoodieWriteConfig.AUTO_COMMIT_ENABLE.key(), Boolean.FALSE.toString());
-    props.put(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key(), String.valueOf(getWriteConfig().getClusteringTargetFileMaxBytes()));
+
     HoodieWriteConfig newConfig = HoodieWriteConfig.newBuilder()
-        .withEngineType(EngineType.JAVA).withProps(props).build();
+        .withBulkInsertParallelism(numOutputGroups)
+        .withEngineType(EngineType.JAVA)
+        .withProps(getWriteConfig().getProps()).build();
+    newConfig.setValue(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE, String.valueOf(getWriteConfig().getClusteringTargetFileMaxBytes()));
     return (List<WriteStatus>) JavaBulkInsertHelper.newInstance().bulkInsert(inputRecords, instantTime, getHoodieTable(), newConfig,
         false, getPartitioner(strategyParams, schema), true, numOutputGroups, new CreateHandleFactory(preserveHoodieMetadata));
   }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
index 06c23049d974d..88921334980ed 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
@@ -22,6 +22,8 @@
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -34,7 +36,6 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -45,6 +46,7 @@
 import org.apache.hudi.io.HoodieCreateHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieSortedMergeHandle;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.action.clean.CleanActionExecutor;
@@ -61,17 +63,16 @@
 import org.apache.hudi.table.action.commit.JavaMergeHelper;
 import org.apache.hudi.table.action.commit.JavaUpsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.JavaUpsertPreppedCommitActionExecutor;
+import org.apache.hudi.table.action.index.RunIndexActionExecutor;
+import org.apache.hudi.table.action.index.ScheduleIndexActionExecutor;
 import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
 import org.apache.hudi.table.action.rollback.RestorePlanActionExecutor;
 import org.apache.hudi.table.action.savepoint.SavepointActionExecutor;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.annotation.Nonnull;
-
 import java.io.IOException;
 import java.util.Collections;
 import java.util.Iterator;
@@ -173,11 +174,6 @@ public HoodieWriteMetadata<List<WriteStatus>> insertOverwriteTable(HoodieEngineC
         context, config, this, instantTime, records).execute();
   }
 
-  @Override
-  public void updateMetadataIndexes(@Nonnull HoodieEngineContext context, @Nonnull List<HoodieWriteStat> stats, @Nonnull String instantTime) {
-    throw new HoodieNotSupportedException("update statistics is not supported yet");
-  }
-
   @Override
   public Option<HoodieCompactionPlan> scheduleCompaction(HoodieEngineContext context,
                                                          String instantTime,
@@ -241,6 +237,16 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context,
         context, config, this, rollbackInstantTime, commitInstant, deleteInstants, skipLocking).execute();
   }
 
+  @Override
+  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
+    return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex).execute();
+  }
+
+  @Override
+  public Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime) {
+    return new RunIndexActionExecutor<>(context, config, this, indexInstantTime).execute();
+  }
+
   @Override
   public HoodieSavepointMetadata savepoint(HoodieEngineContext context,
                                            String instantToSavepoint,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
index 30f1d931a5462..39b2916732f2a 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.table.action.commit;
 
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -111,7 +112,7 @@ public List<WriteStatus> bulkInsert(List<HoodieRecord<T>> inputRecords,
 
     FileIdPrefixProvider fileIdPrefixProvider = (FileIdPrefixProvider) ReflectionUtils.loadClass(
         config.getFileIdPrefixProviderClassName(),
-        config.getProps());
+        new TypedProperties(config.getProps()));
 
     List<WriteStatus> writeStatuses = new ArrayList<>();
 
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
index 8f296d5106174..1bf1b4cccbf51 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
@@ -402,7 +402,7 @@ public void testFileSizeUpsertRecords() throws Exception {
         counts++;
       }
     }
-    assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file");
+    assertEquals(5, counts, "If the number of records are more than 1150, then there should be a new file");
   }
 
   @Test
diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml
index d6c60cb61bc45..1b2cd30fe0676 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark-client</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-spark-client</name>
   <packaging>jar</packaging>
@@ -53,11 +53,6 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-      <scope>provided</scope>
-    </dependency>
 
     <!-- Parquet -->
     <dependency>
@@ -110,6 +105,12 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <version>${zookeeper.version}</version>
+      <scope>test</scope>
+    </dependency>
 
     <!-- Hive - Tests -->
     <dependency>
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java
index 8f6535b11d9b3..dd2ac9193998f 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java
@@ -22,14 +22,15 @@
 import org.apache.hudi.client.BaseClusterer;
 import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieSparkClusteringClient;
+import org.apache.hudi.common.engine.HoodieEngineContext;
 
 /**
  * Async clustering service for Spark datasource.
  */
 public class SparkAsyncClusteringService extends AsyncClusteringService {
 
-  public SparkAsyncClusteringService(BaseHoodieWriteClient writeClient) {
-    super(writeClient);
+  public SparkAsyncClusteringService(HoodieEngineContext engineContext, BaseHoodieWriteClient writeClient) {
+    super(engineContext, writeClient);
   }
 
   @Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
index ac9259c51ad70..7b0c8bbc8d25c 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
@@ -285,7 +285,7 @@ protected JavaRDD<WriteStatus> postWrite(HoodieWriteMetadata<JavaRDD<WriteStatus
             result.getWriteStats().get().size());
       }
 
-      postCommit(hoodieTable, result.getCommitMetadata().get(), instantTime, Option.empty());
+      postCommit(hoodieTable, result.getCommitMetadata().get(), instantTime, Option.empty(), true);
 
       emitCommitMetrics(instantTime, result.getCommitMetadata().get(), hoodieTable.getMetaClient().getCommitActionType());
     }
@@ -389,9 +389,6 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata,
       finalizeWrite(table, clusteringCommitTime, writeStats);
       // Update table's metadata (table)
       updateTableMetadata(table, metadata, clusteringInstant);
-      // Update tables' metadata indexes
-      // NOTE: This overlaps w/ metadata table (above) and will be reconciled in the future
-      table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
 
       LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
 
@@ -428,11 +425,13 @@ private void updateTableMetadata(HoodieTable table, HoodieCommitMetadata commitM
   }
 
   @Override
-  protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
-    // Initialize Metadata Table to make sure it's bootstrapped _before_ the operation,
-    // if it didn't exist before
-    // See https://issues.apache.org/jira/browse/HUDI-3343 for more details
-    initializeMetadataTable(instantTime);
+  protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime, boolean initialMetadataTableIfNecessary) {
+    if (initialMetadataTableIfNecessary) {
+      // Initialize Metadata Table to make sure it's bootstrapped _before_ the operation,
+      // if it didn't exist before
+      // See https://issues.apache.org/jira/browse/HUDI-3343 for more details
+      initializeMetadataTable(instantTime);
+    }
 
     // Create a Hoodie table which encapsulated the commits and files visible
     return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, config.isMetadataTableEnabled());
@@ -474,7 +473,7 @@ protected void preCommit(HoodieInstant inflightInstant, HoodieCommitMetadata met
     // Important to create this after the lock to ensure the latest commits show up in the timeline without need for reload
     HoodieTable table = createTable(config, hadoopConf);
     TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(),
-        Option.of(metadata), config, txnManager.getLastCompletedTransactionOwner());
+        Option.of(metadata), config, txnManager.getLastCompletedTransactionOwner(), false, this.pendingInflightAndRequestedInstants);
   }
 
   @Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java
index 4dedabaec850e..4a7ee7bceeacd 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java
@@ -38,7 +38,6 @@
 
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 
 /**
  * This strategy is similar to {@link SparkSortAndSizeExecutionStrategy} with the difference being that
@@ -67,13 +66,12 @@ public HoodieData<WriteStatus> performClusteringWithRecordsRDD(HoodieData<Hoodie
       throw new HoodieClusteringException("Expect only one file group for strategy: " + getClass().getName());
     }
     LOG.info("Starting clustering for a group, parallelism:" + numOutputGroups + " commit:" + instantTime);
-    Properties props = getWriteConfig().getProps();
-    props.put(HoodieWriteConfig.BULKINSERT_PARALLELISM_VALUE.key(), String.valueOf(numOutputGroups));
-    // We are calling another action executor - disable auto commit. Strategy is only expected to write data in new files.
-    props.put(HoodieWriteConfig.AUTO_COMMIT_ENABLE.key(), Boolean.FALSE.toString());
+
+    HoodieWriteConfig newConfig = HoodieWriteConfig.newBuilder()
+        .withBulkInsertParallelism(numOutputGroups)
+        .withProps(getWriteConfig().getProps()).build();
     // Since clustering will write to single file group using HoodieUnboundedCreateHandle, set max file size to a large value.
-    props.put(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key(), String.valueOf(Long.MAX_VALUE));
-    HoodieWriteConfig newConfig = HoodieWriteConfig.newBuilder().withProps(props).build();
+    newConfig.setValue(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE, String.valueOf(Long.MAX_VALUE));
     return (HoodieData<WriteStatus>) SparkBulkInsertHelper.newInstance().bulkInsert(inputRecords, instantTime, getHoodieTable(), newConfig,
         false, getPartitioner(strategyParams, schema), true, numOutputGroups, new SingleFileHandleCreateFactory(fileGroupIdList.get(0).getFileId(), preserveHoodieMetadata));
   }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java
index d664c835e494b..7db63d4169fc1 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java
@@ -36,7 +36,6 @@
 
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 
 /**
  * Clustering Strategy based on following.
@@ -58,12 +57,11 @@ public HoodieData<WriteStatus> performClusteringWithRecordsRDD(final HoodieData<
                                                               final String instantTime, final Map<String, String> strategyParams, final Schema schema,
                                                               final List<HoodieFileGroupId> fileGroupIdList, final boolean preserveHoodieMetadata) {
     LOG.info("Starting clustering for a group, parallelism:" + numOutputGroups + " commit:" + instantTime);
-    Properties props = getWriteConfig().getProps();
-    props.put(HoodieWriteConfig.BULKINSERT_PARALLELISM_VALUE.key(), String.valueOf(numOutputGroups));
-    // We are calling another action executor - disable auto commit. Strategy is only expected to write data in new files.
-    props.put(HoodieWriteConfig.AUTO_COMMIT_ENABLE.key(), Boolean.FALSE.toString());
-    props.put(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key(), String.valueOf(getWriteConfig().getClusteringTargetFileMaxBytes()));
-    HoodieWriteConfig newConfig = HoodieWriteConfig.newBuilder().withProps(props).build();
+
+    HoodieWriteConfig newConfig = HoodieWriteConfig.newBuilder()
+        .withBulkInsertParallelism(numOutputGroups)
+        .withProps(getWriteConfig().getProps()).build();
+    newConfig.setValue(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE, String.valueOf(getWriteConfig().getClusteringTargetFileMaxBytes()));
     return (HoodieData<WriteStatus>) SparkBulkInsertHelper.newInstance()
         .bulkInsert(inputRecords, instantTime, getHoodieTable(), newConfig, false, getPartitioner(strategyParams, schema), true, numOutputGroups, new CreateHandleFactory(preserveHoodieMetadata));
   }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
index cc29ef70f5dd4..d8281d1a10b73 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
@@ -36,6 +36,8 @@
 import org.apache.hudi.data.HoodieSparkLongAccumulator;
 import org.apache.hudi.exception.HoodieException;
 
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.PairFlatMapFunction;
 import org.apache.spark.sql.SQLContext;
@@ -53,6 +55,7 @@
  */
 public class HoodieSparkEngineContext extends HoodieEngineContext {
 
+  private static final Logger LOG = LogManager.getLogger(HoodieSparkEngineContext.class);
   private final JavaSparkContext javaSparkContext;
   private SQLContext sqlContext;
 
@@ -158,6 +161,8 @@ public <I, K, V> Map<K, V> mapToPair(List<I> data, SerializablePairFunction<I, K
   public void setProperty(EngineProperty key, String value) {
     if (key == EngineProperty.COMPACTION_POOL_NAME) {
       javaSparkContext.setLocalProperty("spark.scheduler.pool", value);
+    } else if (key == EngineProperty.CLUSTERING_POOL_NAME) {
+      javaSparkContext.setLocalProperty("spark.scheduler.pool", value);
     } else {
       throw new HoodieException("Unknown engine property :" + key);
     }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java
new file mode 100644
index 0000000000000..8e086c2927e42
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.utils;
+
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.action.InternalSchemaMerger;
+import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.types.ArrayType;
+import org.apache.spark.sql.types.ArrayType$;
+import org.apache.spark.sql.types.BinaryType;
+import org.apache.spark.sql.types.BinaryType$;
+import org.apache.spark.sql.types.BooleanType;
+import org.apache.spark.sql.types.BooleanType$;
+import org.apache.spark.sql.types.ByteType;
+import org.apache.spark.sql.types.CharType;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.DateType$;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.DecimalType;
+import org.apache.spark.sql.types.DecimalType$;
+import org.apache.spark.sql.types.DoubleType;
+import org.apache.spark.sql.types.DoubleType$;
+import org.apache.spark.sql.types.FloatType;
+import org.apache.spark.sql.types.FloatType$;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.types.IntegerType$;
+import org.apache.spark.sql.types.LongType;
+import org.apache.spark.sql.types.LongType$;
+import org.apache.spark.sql.types.MapType;
+import org.apache.spark.sql.types.MapType$;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.ShortType;
+import org.apache.spark.sql.types.StringType;
+import org.apache.spark.sql.types.StringType$;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.StructType$;
+import org.apache.spark.sql.types.TimestampType;
+import org.apache.spark.sql.types.TimestampType$;
+import org.apache.spark.sql.types.UserDefinedType;
+import org.apache.spark.sql.types.VarcharType;
+
+import java.nio.charset.StandardCharsets;
+import java.sql.Date;
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+
+public class SparkInternalSchemaConverter {
+  private SparkInternalSchemaConverter() {
+
+  }
+
+  public static final String HOODIE_QUERY_SCHEMA = "hoodie.schema.internal.querySchema";
+  public static final String HOODIE_TABLE_PATH = "hoodie.tablePath";
+  public static final String HOODIE_VALID_COMMITS_LIST = "hoodie.valid.commits.list";
+
+  /**
+   * Converts a spark schema to an hudi internal schema. Fields without IDs are kept and assigned fallback IDs.
+   *
+   * @param sparkSchema a spark schema
+   * @return a matching internal schema for the provided spark schema
+   */
+  public static InternalSchema convertStructTypeToInternalSchema(StructType sparkSchema) {
+    Type newType = buildTypeFromStructType(sparkSchema, true, new AtomicInteger(0));
+    return new InternalSchema(((Types.RecordType)newType).fields());
+  }
+
+  public static Type buildTypeFromStructType(DataType sparkType, Boolean firstVisitRoot, AtomicInteger nextId) {
+    if (sparkType instanceof StructType) {
+      StructField[] fields = ((StructType) sparkType).fields();
+      int nextAssignId = firstVisitRoot ? 0 : nextId.get();
+      nextId.set(nextAssignId + fields.length);
+      List<Type> newTypes = new ArrayList<>();
+      for (StructField f : fields) {
+        newTypes.add(buildTypeFromStructType(f.dataType(), false, nextId));
+      }
+      List<Types.Field> newFields = new ArrayList<>();
+      for (int i = 0; i < newTypes.size(); i++) {
+        StructField f = fields[i];
+        newFields.add(Types.Field.get(nextAssignId + i, f.nullable(), f.name(), newTypes.get(i),
+            f.getComment().isDefined() ? f.getComment().get() : null));
+      }
+      return Types.RecordType.get(newFields);
+    } else if (sparkType instanceof MapType) {
+      MapType map = (MapType) sparkType;
+      DataType keyType = map.keyType();
+      DataType valueType = map.valueType();
+      int keyId = nextId.get();
+      int valueId = keyId + 1;
+      nextId.set(valueId + 1);
+      return Types.MapType.get(keyId, valueId, buildTypeFromStructType(keyType, false, nextId),
+          buildTypeFromStructType(valueType, false, nextId), map.valueContainsNull());
+    } else if (sparkType instanceof ArrayType) {
+      ArrayType array = (ArrayType) sparkType;
+      DataType et = array.elementType();
+      int elementId = nextId.get();
+      nextId.set(elementId + 1);
+      return Types.ArrayType.get(elementId, array.containsNull(), buildTypeFromStructType(et, false, nextId));
+    } else if (sparkType instanceof UserDefinedType) {
+      throw new UnsupportedOperationException("User-defined types are not supported");
+    } else if (sparkType instanceof BooleanType) {
+      return Types.BooleanType.get();
+    } else if (sparkType instanceof IntegerType
+        || sparkType instanceof ShortType
+        || sparkType instanceof ByteType) {
+      return Types.IntType.get();
+    } else if (sparkType instanceof LongType) {
+      return Types.LongType.get();
+    } else if (sparkType instanceof FloatType) {
+      return Types.FloatType.get();
+    } else if (sparkType instanceof DoubleType) {
+      return Types.DoubleType.get();
+    } else if (sparkType instanceof StringType
+        || sparkType instanceof CharType
+        || sparkType instanceof VarcharType) {
+      return Types.StringType.get();
+    } else if (sparkType instanceof DateType) {
+      return Types.DateType.get();
+      // spark 3.3.0 support TimeStampNTZ, to do support spark3.3.0
+    } else if (sparkType instanceof TimestampType) {
+      return Types.TimestampType.get();
+    } else if (sparkType instanceof DecimalType) {
+      return Types.DecimalType.get(
+          ((DecimalType) sparkType).precision(),
+          ((DecimalType) sparkType).scale());
+    } else if (sparkType instanceof BinaryType) {
+      return Types.BinaryType.get();
+    } else {
+      throw new UnsupportedOperationException(String.format("Not a supported type: %s",  sparkType.catalogString()));
+    }
+  }
+
+  /**
+   * Converts Spark schema to Hudi internal schema, and prune fields.
+   * Fields without IDs are kept and assigned fallback IDs.
+   *
+   * @param sparkSchema a pruned spark schema
+   * @param originSchema a internal schema for hoodie table
+   * @return a pruned internal schema for the provided spark schema
+   */
+  public static InternalSchema convertAndPruneStructTypeToInternalSchema(StructType sparkSchema, InternalSchema originSchema) {
+    List<String> pruneNames = collectColNamesFromSparkStruct(sparkSchema);
+    return InternalSchemaUtils.pruneInternalSchema(originSchema, pruneNames);
+  }
+
+  /**
+   * Collect all the leaf nodes names.
+   *
+   * @param sparkSchema a spark schema
+   * @return leaf nodes full names.
+   */
+  public static List<String> collectColNamesFromSparkStruct(StructType sparkSchema) {
+    List<String> result =  new ArrayList<>();
+    collectColNamesFromStructType(sparkSchema, new LinkedList<>(), result);
+    return result;
+  }
+
+  private static void collectColNamesFromStructType(DataType sparkType, Deque<String> fieldNames, List<String> resultSet) {
+    if (sparkType instanceof StructType) {
+      StructField[] fields = ((StructType) sparkType).fields();
+      for (StructField f : fields) {
+        fieldNames.push(f.name());
+        collectColNamesFromStructType(f.dataType(), fieldNames, resultSet);
+        fieldNames.pop();
+        addFullName(f.dataType(), f.name(), fieldNames, resultSet);
+      }
+    } else if (sparkType instanceof MapType) {
+      MapType map = (MapType) sparkType;
+      DataType keyType = map.keyType();
+      DataType valueType = map.valueType();
+      // key
+      fieldNames.push("key");
+      collectColNamesFromStructType(keyType, fieldNames, resultSet);
+      fieldNames.pop();
+      addFullName(keyType,"key", fieldNames, resultSet);
+      // value
+      fieldNames.push("value");
+      collectColNamesFromStructType(valueType, fieldNames, resultSet);
+      fieldNames.poll();
+      addFullName(valueType,"value", fieldNames, resultSet);
+    } else if (sparkType instanceof ArrayType) {
+      ArrayType array = (ArrayType) sparkType;
+      DataType et = array.elementType();
+      fieldNames.push("element");
+      collectColNamesFromStructType(et, fieldNames, resultSet);
+      fieldNames.pop();
+      addFullName(et, "element", fieldNames, resultSet);
+    } else if (sparkType instanceof UserDefinedType) {
+      throw new UnsupportedOperationException("User-defined types are not supported");
+    } else {
+      // do nothings
+    }
+  }
+
+  private static void addFullName(DataType sparkType, String name, Deque<String> fieldNames, List<String> resultSet) {
+    if (!(sparkType instanceof StructType) && !(sparkType instanceof ArrayType) && !(sparkType instanceof MapType)) {
+      resultSet.add(InternalSchemaUtils.createFullName(name, fieldNames));
+    }
+  }
+
+  public static StructType mergeSchema(InternalSchema fileSchema, InternalSchema querySchema) {
+    InternalSchema schema = new InternalSchemaMerger(fileSchema, querySchema, true, true).mergeSchema();
+    return constructSparkSchemaFromInternalSchema(schema);
+  }
+
+  public static Map<Integer, Pair<DataType, DataType>> collectTypeChangedCols(InternalSchema schema, InternalSchema other) {
+    return InternalSchemaUtils
+        .collectTypeChangedCols(schema, other)
+        .entrySet()
+        .stream()
+        .collect(Collectors.toMap(e -> e.getKey(), e -> Pair.of(constructSparkSchemaFromType(e.getValue().getLeft()), constructSparkSchemaFromType(e.getValue().getRight()))));
+  }
+
+  public static StructType constructSparkSchemaFromInternalSchema(InternalSchema schema) {
+    return (StructType) constructSparkSchemaFromType(schema.getRecord());
+  }
+
+  private static DataType constructSparkSchemaFromType(Type type) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Types.Field> fields = record.fields();
+        List<StructField> structFields = new ArrayList<>();
+        for (Types.Field f : fields) {
+          DataType dataType = constructSparkSchemaFromType(f.type());
+          StructField structField = StructField.apply(f.name(), dataType, f.isOptional(), Metadata.empty());
+          structField = f.doc() == null ? structField : structField.withComment(f.doc());
+          structFields.add(structField);
+        }
+        return StructType$.MODULE$.apply(structFields);
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        DataType elementType = constructSparkSchemaFromType(array.elementType());
+        return ArrayType$.MODULE$.apply(elementType, array.isElementOptional());
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        DataType keyDataType = constructSparkSchemaFromType(map.keyType());
+        DataType valueDataType = constructSparkSchemaFromType(map.valueType());
+        return MapType$.MODULE$.apply(keyDataType, valueDataType, map.isValueOptional());
+      case BOOLEAN:
+        return BooleanType$.MODULE$;
+      case INT:
+        return IntegerType$.MODULE$;
+      case LONG:
+        return LongType$.MODULE$;
+      case FLOAT:
+        return FloatType$.MODULE$;
+      case DOUBLE:
+        return DoubleType$.MODULE$;
+      case DATE:
+        return DateType$.MODULE$;
+      case TIME:
+        throw new UnsupportedOperationException(String.format("cannot convert %s type to Spark", type));
+      case TIMESTAMP:
+        // todo support TimeStampNTZ
+        return TimestampType$.MODULE$;
+      case STRING:
+        return StringType$.MODULE$;
+      case UUID:
+        return StringType$.MODULE$;
+      case FIXED:
+        return BinaryType$.MODULE$;
+      case BINARY:
+        return BinaryType$.MODULE$;
+      case DECIMAL:
+        Types.DecimalType decimal = (Types.DecimalType) type;
+        return DecimalType$.MODULE$.apply(decimal.precision(), decimal.scale());
+      default:
+        throw new UnsupportedOperationException(String.format("cannot convert unknown type: %s to Spark", type));
+    }
+  }
+
+  /**
+   * Convert Int/long type to other Type.
+   * Now only support int/long -> long/float/double/string
+   * TODO: support more types
+   */
+  private static boolean convertIntLongType(WritableColumnVector oldV, WritableColumnVector newV, DataType newType, int len) {
+    boolean isInt = oldV.dataType() instanceof IntegerType;
+    if (newType instanceof LongType || newType instanceof FloatType
+        || newType instanceof DoubleType || newType instanceof StringType || newType instanceof DecimalType) {
+      for (int i = 0; i < len; i++) {
+        if (oldV.isNullAt(i)) {
+          newV.putNull(i);
+          continue;
+        }
+        // int/long -> long/float/double/string/decimal
+        if (newType instanceof LongType) {
+          newV.putLong(i, isInt ? oldV.getInt(i) : oldV.getLong(i));
+        } else if (newType instanceof FloatType) {
+          newV.putFloat(i, isInt ? oldV.getInt(i) : oldV.getLong(i));
+        } else if (newType instanceof DoubleType) {
+          newV.putDouble(i, isInt ? oldV.getInt(i) : oldV.getLong(i));
+        } else if (newType instanceof StringType) {
+          newV.putByteArray(i, ((isInt ? oldV.getInt(i) : oldV.getLong(i)) + "").getBytes(StandardCharsets.UTF_8));
+        } else if (newType instanceof DecimalType) {
+          Decimal oldDecimal = Decimal.apply(isInt ? oldV.getInt(i) : oldV.getLong(i));
+          oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale());
+          newV.putDecimal(i, oldDecimal, ((DecimalType) newType).precision());
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Convert float type to other Type.
+   * Now only support float -> double/String
+   * TODO: support more types
+   */
+  private static boolean convertFloatType(WritableColumnVector oldV, WritableColumnVector newV, DataType newType, int len) {
+    if (newType instanceof DoubleType || newType instanceof StringType || newType instanceof DecimalType) {
+      for (int i = 0; i < len; i++) {
+        if (oldV.isNullAt(i)) {
+          newV.putNull(i);
+          continue;
+        }
+        // float -> double/string/decimal
+        if (newType instanceof DoubleType) {
+          newV.putDouble(i, Double.valueOf(oldV.getFloat(i) + ""));
+        } else if (newType instanceof StringType) {
+          newV.putByteArray(i, (oldV.getFloat(i) + "").getBytes(StandardCharsets.UTF_8));
+        } else if (newType instanceof DecimalType) {
+          Decimal oldDecimal = Decimal.apply(oldV.getFloat(i));
+          oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale());
+          newV.putDecimal(i, oldDecimal, ((DecimalType) newType).precision());
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Convert double type to other Type.
+   * Now only support Double -> Decimal/String
+   * TODO: support more types
+   */
+  private static boolean convertDoubleType(WritableColumnVector oldV, WritableColumnVector newV, DataType newType, int len) {
+    if (newType instanceof DecimalType || newType instanceof StringType) {
+      for (int i = 0; i < len; i++) {
+        if (oldV.isNullAt(i)) {
+          newV.putNull(i);
+          continue;
+        }
+        // double -> decimal/string
+        if (newType instanceof DecimalType) {
+          Decimal oldDecimal = Decimal.apply(oldV.getDouble(i));
+          oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale());
+          newV.putDecimal(i, oldDecimal, ((DecimalType) newType).precision());
+        } else if (newType instanceof StringType) {
+          newV.putByteArray(i, (oldV.getDouble(i) + "").getBytes(StandardCharsets.UTF_8));
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Convert decimal type to other Type.
+   * Now only support Decimal -> Decimal/String
+   * TODO: support more types
+   */
+  private static boolean convertDecimalType(WritableColumnVector oldV, WritableColumnVector newV, DataType newType, int len) {
+    DataType oldType = oldV.dataType();
+    if (newType instanceof DecimalType || newType instanceof StringType) {
+      for (int i = 0; i < len; i++) {
+        if (oldV.isNullAt(i)) {
+          newV.putNull(i);
+          continue;
+        }
+        Decimal oldDecimal = oldV.getDecimal(i, ((DecimalType) oldType).precision(), ((DecimalType) oldType).scale());
+        if (newType instanceof DecimalType) {
+          oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale());
+          newV.putDecimal(i, oldDecimal, ((DecimalType) newType).precision());
+        } else if (newType instanceof StringType) {
+          newV.putByteArray(i, oldDecimal.toString().getBytes(StandardCharsets.UTF_8));
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Convert date type to other Type.
+   * Now only support Date -> String
+   * TODO: support more types
+   */
+  private static boolean convertDateType(WritableColumnVector oldV, WritableColumnVector newV, DataType newType, int len) {
+    if (newType instanceof StringType) {
+      for (int i = 0; i < len; i++) {
+        if (oldV.isNullAt(i)) {
+          newV.putNull(i);
+          continue;
+        }
+        // to do support rebaseDate
+        String res = org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaDate(oldV.getInt(i)).toString();
+        newV.putByteArray(i, res.getBytes(StandardCharsets.UTF_8));
+      }
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Convert String type to other Type.
+   * Now only support String -> Decimal/Date.
+   * Notice: This convert maybe failed!!!
+   * TODO: support more types
+   */
+  private static boolean convertStringType(WritableColumnVector oldV, WritableColumnVector newV, DataType newType, int len) {
+    if (newType instanceof DateType || newType instanceof DecimalType) {
+      for (int i = 0; i < len; i++) {
+        if (oldV.isNullAt(i)) {
+          newV.putNull(i);
+          continue;
+        }
+        // to do support rebaseDate
+        if (newType instanceof DateType) {
+          int days = org.apache.spark.sql.catalyst.util.DateTimeUtils.fromJavaDate(Date.valueOf(oldV.getUTF8String(i).toString()));
+          newV.putInt(i, days);
+        } else if (newType instanceof DecimalType) {
+          DecimalType decimalType = (DecimalType) newType;
+          java.math.BigDecimal bigDecimal = new java.math.BigDecimal(oldV.getUTF8String(i).toString().trim());
+          Decimal sparkDecimal = Decimal.apply(bigDecimal);
+          sparkDecimal.changePrecision(decimalType.precision(), decimalType.scale());
+          newV.putDecimal(i, sparkDecimal, decimalType.precision());
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
+  public static boolean convertColumnVectorType(WritableColumnVector oldV, WritableColumnVector newV, int len) {
+    if (len == 0 || oldV == null || newV == null) {
+      return false;
+    }
+    DataType oldType = oldV.dataType();  // old colType eg: floatType
+    DataType newType = newV.dataType();  // new colType eg: doubleType
+    if (oldV != null && newType != null) {
+      if (oldType instanceof BooleanType) {
+        return false;
+      } else if (oldType instanceof ByteType) {
+        return false;
+      } else if (oldType instanceof ShortType) {
+        return false;
+      } else if (oldType instanceof IntegerType) {
+        return convertIntLongType(oldV, newV, newType, len);
+      } else if (oldType instanceof LongType) {
+        return convertIntLongType(oldV, newV, newType, len);
+      } else if (oldType instanceof FloatType) {
+        return convertFloatType(oldV, newV, newType, len);
+      } else if (oldType instanceof DoubleType) {
+        return convertDoubleType(oldV, newV, newType, len);
+      } else if (oldType instanceof StringType) {
+        return convertStringType(oldV, newV, newType, len);
+      } else if (oldType instanceof BinaryType) {
+        return false;
+      } else if (oldType instanceof DecimalType) {
+        return convertDecimalType(oldV, newV, newType, len);
+      } else if (oldType instanceof DateType) {
+        return convertDateType(oldV, newV, newType, len);
+      } else if (oldType instanceof TimestampType) {
+        return false;
+      } else {
+        throw new UnsupportedOperationException("Datatype not supported " + oldV);
+      }
+    }
+    return false;
+  }
+}
+
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
index 1659fe016ca1d..9c2f37d56a509 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
@@ -30,17 +30,22 @@
 import org.apache.hudi.data.HoodieJavaRDD;
 import org.apache.hudi.io.HoodieKeyLookupResult;
 import org.apache.hudi.table.HoodieTable;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.Partitioner;
 import org.apache.spark.api.java.JavaRDD;
-import scala.Tuple2;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import scala.Tuple2;
+
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
+
 /**
  * Helper for {@link HoodieBloomIndex} containing Spark-specific logic.
  */
@@ -75,7 +80,9 @@ public HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecor
         + config.getBloomIndexParallelism() + "}");
 
     JavaRDD<List<HoodieKeyLookupResult>> keyLookupResultRDD;
-    if (config.isMetadataBloomFilterIndexEnabled()) {
+    if (config.getBloomIndexUseMetadata()
+        && getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig())
+        .contains(BLOOM_FILTERS.getPartitionPath())) {
       // Step 1: Sort by file id
       JavaRDD<Tuple2<String, HoodieKey>> sortedFileIdAndKeyPairs =
           fileComparisonsRDD.sortBy(Tuple2::_1, true, joinParallelism);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java
deleted file mode 100644
index b98893344b8ec..0000000000000
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.index.columnstats;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
-import org.apache.hudi.common.model.HoodieFileFormat;
-import org.apache.hudi.common.util.BaseFileUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ParquetUtils;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.SparkContext;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.Row$;
-import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.types.BinaryType;
-import org.apache.spark.sql.types.BooleanType;
-import org.apache.spark.sql.types.ByteType;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.DateType;
-import org.apache.spark.sql.types.DecimalType;
-import org.apache.spark.sql.types.DoubleType;
-import org.apache.spark.sql.types.FloatType;
-import org.apache.spark.sql.types.IntegerType;
-import org.apache.spark.sql.types.LongType;
-import org.apache.spark.sql.types.LongType$;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.ShortType;
-import org.apache.spark.sql.types.StringType;
-import org.apache.spark.sql.types.StringType$;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.types.StructType$;
-import org.apache.spark.sql.types.TimestampType;
-import org.apache.spark.util.SerializableConfiguration;
-import scala.collection.JavaConversions;
-
-import javax.annotation.Nonnull;
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.UUID;
-import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
-
-import static org.apache.hudi.util.DataTypeUtils.areCompatible;
-
-public class ColumnStatsIndexHelper {
-
-  private static final Logger LOG = LogManager.getLogger(ColumnStatsIndexHelper.class);
-
-  private static final String SPARK_JOB_DESCRIPTION = "spark.job.description";
-
-  private static final String COLUMN_STATS_INDEX_FILE_COLUMN_NAME = "file";
-  private static final String COLUMN_STATS_INDEX_MIN_VALUE_STAT_NAME = "minValue";
-  private static final String COLUMN_STATS_INDEX_MAX_VALUE_STAT_NAME = "maxValue";
-  private static final String COLUMN_STATS_INDEX_NUM_NULLS_STAT_NAME = "num_nulls";
-
-  public static String getMinColumnNameFor(String colName) {
-    return composeZIndexColName(colName, COLUMN_STATS_INDEX_MIN_VALUE_STAT_NAME);
-  }
-
-  public static String getMaxColumnNameFor(String colName) {
-    return composeZIndexColName(colName, COLUMN_STATS_INDEX_MAX_VALUE_STAT_NAME);
-  }
-
-  public static String getNumNullsColumnNameFor(String colName) {
-    return composeZIndexColName(colName, COLUMN_STATS_INDEX_NUM_NULLS_STAT_NAME);
-  }
-
-  /**
-   * Parse min/max statistics from Parquet footers for provided columns and composes column-stats
-   * index table in the following format with 3 statistics denominated for each
-   * linear/Z-curve/Hilbert-curve-ordered column. For ex, if original table contained
-   * column {@code A}:
-   *
-   * <pre>
-   * +---------------------------+------------+------------+-------------+
-   * |          file             | A_minValue | A_maxValue | A_num_nulls |
-   * +---------------------------+------------+------------+-------------+
-   * | one_base_file.parquet     |          1 |         10 |           0 |
-   * | another_base_file.parquet |        -10 |          0 |           5 |
-   * +---------------------------+------------+------------+-------------+
-   * </pre>
-   *
-   * NOTE: Currently {@link TimestampType} is not supported, since Parquet writer
-   * does not support statistics for it.
-   *
-   * TODO leverage metadata table after RFC-27 lands
-   * @VisibleForTesting
-   *
-   * @param sparkSession encompassing Spark session
-   * @param baseFilesPaths list of base-files paths to be sourced for column-stats index
-   * @param orderedColumnSchemas target ordered columns
-   * @return Spark's {@link Dataset} holding an index table
-   */
-  @Nonnull
-  public static Dataset<Row> buildColumnStatsTableFor(
-      @Nonnull SparkSession sparkSession,
-      @Nonnull List<String> baseFilesPaths,
-      @Nonnull List<StructField> orderedColumnSchemas
-  ) {
-    SparkContext sc = sparkSession.sparkContext();
-    JavaSparkContext jsc = new JavaSparkContext(sc);
-
-    SerializableConfiguration serializableConfiguration = new SerializableConfiguration(sc.hadoopConfiguration());
-    int numParallelism = (baseFilesPaths.size() / 3 + 1);
-    List<HoodieColumnRangeMetadata<Comparable>> colMinMaxInfos;
-    String previousJobDescription = sc.getLocalProperty(SPARK_JOB_DESCRIPTION);
-    try {
-      jsc.setJobDescription("Listing parquet column statistics");
-      colMinMaxInfos =
-          jsc.parallelize(baseFilesPaths, numParallelism)
-              .mapPartitions(paths -> {
-                ParquetUtils utils = (ParquetUtils) BaseFileUtils.getInstance(HoodieFileFormat.PARQUET);
-                Iterable<String> iterable = () -> paths;
-                return StreamSupport.stream(iterable.spliterator(), false)
-                    .flatMap(path ->
-                        utils.readRangeFromParquetMetadata(
-                                serializableConfiguration.value(),
-                                new Path(path),
-                                orderedColumnSchemas.stream()
-                                    .map(StructField::name)
-                                    .collect(Collectors.toList())
-                            )
-                            .stream()
-                    )
-                    .iterator();
-              })
-              .collect();
-    } finally {
-      jsc.setJobDescription(previousJobDescription);
-    }
-
-    // Group column's metadata by file-paths of the files it belongs to
-    Map<String, List<HoodieColumnRangeMetadata<Comparable>>> filePathToColumnMetadataMap =
-        colMinMaxInfos.stream()
-            .collect(Collectors.groupingBy(HoodieColumnRangeMetadata::getFilePath));
-
-    JavaRDD<Row> allMetaDataRDD =
-        jsc.parallelize(new ArrayList<>(filePathToColumnMetadataMap.values()), 1)
-            .map(fileColumnsMetadata -> {
-              int colSize = fileColumnsMetadata.size();
-              if (colSize == 0) {
-                return null;
-              }
-
-              String filePath = fileColumnsMetadata.get(0).getFilePath();
-
-              List<Object> indexRow = new ArrayList<>();
-
-              // First columns of the Z-index's row is target file-path
-              indexRow.add(filePath);
-
-              // For each column
-              orderedColumnSchemas.forEach(colSchema -> {
-                String colName = colSchema.name();
-
-                HoodieColumnRangeMetadata<Comparable> colMetadata =
-                    fileColumnsMetadata.stream()
-                        .filter(s -> s.getColumnName().trim().equalsIgnoreCase(colName))
-                        .findFirst()
-                        .orElse(null);
-
-                DataType colType = colSchema.dataType();
-                if (colMetadata == null || colType == null) {
-                  throw new HoodieException(String.format("Cannot collect min/max statistics for column (%s)", colSchema));
-                }
-
-                Pair<Object, Object> minMaxValue = fetchMinMaxValues(colType, colMetadata);
-
-                indexRow.add(minMaxValue.getLeft());      // min
-                indexRow.add(minMaxValue.getRight());     // max
-                indexRow.add(colMetadata.getNullCount());
-              });
-
-              return Row$.MODULE$.apply(JavaConversions.asScalaBuffer(indexRow));
-            })
-            .filter(Objects::nonNull);
-
-    StructType indexSchema = composeIndexSchema(orderedColumnSchemas);
-
-    return sparkSession.createDataFrame(allMetaDataRDD, indexSchema);
-  }
-
-  /**
-   * <p/>
-   * Updates state of the column-stats index by:
-   * <ol>
-   *   <li>Updating column-stats index with statistics for {@code sourceBaseFiles},
-   *   collecting corresponding column statistics from Parquet footers</li>
-   *   <li>Merging newly built column-stats index table with the most recent one (if present
-   *   and not preempted)</li>
-   *   <li>Cleans up any residual index tables, that weren't cleaned up before</li>
-   * </ol>
-   *
-   * @param sparkSession encompassing Spark session
-   * @param sourceTableSchema instance of {@link StructType} bearing source table's writer's schema
-   * @param sourceBaseFiles list of base-files to be indexed
-   * @param orderedCols target ordered columns
-   * @param indexFolderPath col-stats index folder path
-   * @param commitTime current operation commit instant
-   * @param completedCommits all previously completed commit instants
-   */
-  public static void updateColumnStatsIndexFor(
-      @Nonnull SparkSession sparkSession,
-      @Nonnull StructType sourceTableSchema,
-      @Nonnull List<String> sourceBaseFiles,
-      @Nonnull List<String> orderedCols,
-      @Nonnull String indexFolderPath,
-      @Nonnull String commitTime,
-      @Nonnull List<String> completedCommits
-  ) {
-    FileSystem fs = FSUtils.getFs(indexFolderPath, sparkSession.sparkContext().hadoopConfiguration());
-
-    // Compose new col-stats index table for the given source base files
-    Dataset<Row> newColStatsIndexDf =
-        buildColumnStatsTableFor(
-            sparkSession,
-            sourceBaseFiles,
-            orderedCols.stream()
-                .map(col -> sourceTableSchema.fields()[sourceTableSchema.fieldIndex(col)])
-                .collect(Collectors.toList())
-        );
-
-    try {
-      //
-      // Column Stats Index has the following folder structure:
-      //
-      // .hoodie/
-      // ├── .colstatsindex/
-      // │   ├── <instant>/
-      // │   │   ├── <part-...>.parquet
-      // │   │   └── ...
-      //
-      Path newIndexTablePath = new Path(indexFolderPath, commitTime);
-
-      // If index is currently empty (no persisted tables), we simply create one
-      // using clustering operation's commit instance as it's name
-      if (!fs.exists(new Path(indexFolderPath))) {
-        newColStatsIndexDf.repartition(1)
-            .write()
-            .format("parquet")
-            .mode("overwrite")
-            .save(newIndexTablePath.toString());
-        return;
-      }
-
-      // Filter in all index tables (w/in {@code .zindex} folder)
-      List<String> allIndexTables =
-          Arrays.stream(
-                  fs.listStatus(new Path(indexFolderPath))
-              )
-              .filter(FileStatus::isDirectory)
-              .map(f -> f.getPath().getName())
-              .collect(Collectors.toList());
-
-      // Compile list of valid index tables that were produced as part
-      // of previously successfully committed iterations
-      List<String> validIndexTables =
-          allIndexTables.stream()
-              .filter(completedCommits::contains)
-              .sorted()
-              .collect(Collectors.toList());
-
-      List<String> tablesToCleanup =
-          allIndexTables.stream()
-              .filter(f -> !completedCommits.contains(f))
-              .collect(Collectors.toList());
-
-      Dataset<Row> finalColStatsIndexDf;
-
-      // Before writing out new version of the col-stats-index table we need to merge it
-      // with the most recent one that were successfully persisted previously
-      if (validIndexTables.isEmpty()) {
-        finalColStatsIndexDf = newColStatsIndexDf;
-      } else {
-        Path latestIndexTablePath = new Path(indexFolderPath, validIndexTables.get(validIndexTables.size() - 1));
-
-        Option<Dataset<Row>> existingIndexTableOpt =
-            tryLoadExistingIndexTable(sparkSession, latestIndexTablePath);
-
-        if (!existingIndexTableOpt.isPresent()) {
-          finalColStatsIndexDf = newColStatsIndexDf;
-        } else {
-          // NOTE: That Parquet schema might deviate from the original table schema (for ex,
-          //       by upcasting "short" to "integer" types, etc), and hence we need to re-adjust it
-          //       prior to merging, since merging might fail otherwise due to schemas incompatibility
-          finalColStatsIndexDf =
-              tryMergeMostRecentIndexTableInto(
-                  sparkSession,
-                  newColStatsIndexDf,
-                  // Load current most recent col-stats-index table
-                  existingIndexTableOpt.get()
-              );
-
-          // Clean up all index tables (after creation of the new index)
-          tablesToCleanup.addAll(validIndexTables);
-        }
-      }
-
-      // Persist new col-stats-index table
-      finalColStatsIndexDf
-          .repartition(1)
-          .write()
-          .format("parquet")
-          // NOTE: We intend to potentially overwrite index-table from the previous Clustering
-          //       operation that has failed to commit
-          .mode("overwrite")
-          .save(newIndexTablePath.toString());
-
-      // Clean up residual col-stats-index tables that have might have been dangling since
-      // previous iterations (due to intermittent failures during previous clean up)
-      tablesToCleanup.forEach(f -> {
-        try {
-          fs.delete(new Path(indexFolderPath, f), true);
-        } catch (IOException ie) {
-          // NOTE: Exception is deliberately swallowed to not affect overall clustering operation,
-          //       since failing col-stats-index table will be attempted to be cleaned up upon subsequent
-          //       clustering iteration
-          LOG.warn(String.format("Failed to cleanup residual col-stats-index table: %s", f), ie);
-        }
-      });
-    } catch (IOException e) {
-      LOG.error("Failed to build new col-stats-index table", e);
-      throw new HoodieException("Failed to build new col-stats-index table", e);
-    }
-  }
-
-  @Nonnull
-  private static Option<Dataset<Row>> tryLoadExistingIndexTable(@Nonnull SparkSession sparkSession, @Nonnull Path indexTablePath) {
-    try {
-      Dataset<Row> indexTableDataset = sparkSession.read().load(indexTablePath.toUri().toString());
-      return Option.of(indexTableDataset);
-    } catch (Exception e) {
-      LOG.error(String.format("Failed to load existing Column Stats index table from (%s)", indexTablePath), e);
-      return Option.empty();
-    }
-  }
-
-  @Nonnull
-  private static Dataset<Row> tryMergeMostRecentIndexTableInto(
-      @Nonnull SparkSession sparkSession,
-      @Nonnull Dataset<Row> newIndexTableDf,
-      @Nonnull Dataset<Row> existingIndexTableDf
-  ) {
-    // NOTE: If new col-stats index table schema is incompatible with that one of existing table
-    //       that is most likely due to changing settings of list of Z-ordered columns, that
-    //       occurred since last index table have been persisted.
-    //
-    //       In that case, we simply drop existing index table and just persist the new one;
-    //
-    //       Also note that we're checking compatibility of _old_ index-table with new one and that
-    //       COMPATIBILITY OPERATION DOES NOT COMMUTE (ie if A is compatible w/ B,
-    //       B might not necessarily be compatible w/ A)
-    if (!areCompatible(existingIndexTableDf.schema(), newIndexTableDf.schema())) {
-      return newIndexTableDf;
-    }
-
-    String randomSuffix = UUID.randomUUID().toString().replace("-", "");
-
-    String existingIndexTempTableName = "existingIndexTable_" + randomSuffix;
-    String newIndexTempTableName = "newIndexTable_" + randomSuffix;
-
-    existingIndexTableDf.registerTempTable(existingIndexTempTableName);
-    newIndexTableDf.registerTempTable(newIndexTempTableName);
-
-    List<String> newTableColumns = Arrays.asList(newIndexTableDf.schema().fieldNames());
-
-    // Create merged table by doing full-out join
-    return sparkSession.sql(createIndexMergeSql(existingIndexTempTableName, newIndexTempTableName, newTableColumns));
-  }
-
-  /**
-   * @VisibleForTesting
-   */
-  @Nonnull
-  public static StructType composeIndexSchema(@Nonnull List<StructField> zorderedColumnsSchemas) {
-    List<StructField> schema = new ArrayList<>();
-    schema.add(new StructField(COLUMN_STATS_INDEX_FILE_COLUMN_NAME, StringType$.MODULE$, true, Metadata.empty()));
-    zorderedColumnsSchemas.forEach(colSchema -> {
-      schema.add(composeColumnStatStructType(colSchema.name(), COLUMN_STATS_INDEX_MIN_VALUE_STAT_NAME, colSchema.dataType()));
-      schema.add(composeColumnStatStructType(colSchema.name(), COLUMN_STATS_INDEX_MAX_VALUE_STAT_NAME, colSchema.dataType()));
-      schema.add(composeColumnStatStructType(colSchema.name(), COLUMN_STATS_INDEX_NUM_NULLS_STAT_NAME, LongType$.MODULE$));
-    });
-    return StructType$.MODULE$.apply(schema);
-  }
-
-  private static StructField composeColumnStatStructType(String col, String statName, DataType dataType) {
-    return new StructField(composeZIndexColName(col, statName), dataType, true, Metadata.empty());
-  }
-
-  private static String composeZIndexColName(String col, String statName) {
-    // TODO add escaping for
-    return String.format("%s_%s", col, statName);
-  }
-
-  private static Pair<Object, Object>
-      fetchMinMaxValues(
-          @Nonnull DataType colType,
-          @Nonnull HoodieColumnRangeMetadata<Comparable> colMetadata) {
-    if (colType instanceof IntegerType) {
-      return Pair.of(
-          new Integer(colMetadata.getMinValue().toString()),
-          new Integer(colMetadata.getMaxValue().toString())
-      );
-    } else if (colType instanceof DoubleType) {
-      return Pair.of(
-          new Double(colMetadata.getMinValue().toString()),
-          new Double(colMetadata.getMaxValue().toString())
-      );
-    } else if (colType instanceof StringType) {
-      return Pair.of(
-          colMetadata.getMinValue().toString(),
-          colMetadata.getMaxValue().toString());
-    } else if (colType instanceof DecimalType) {
-      return Pair.of(
-          new BigDecimal(colMetadata.getMinValue().toString()),
-          new BigDecimal(colMetadata.getMaxValue().toString()));
-    } else if (colType instanceof DateType) {
-      return Pair.of(
-          java.sql.Date.valueOf(colMetadata.getMinValue().toString()),
-          java.sql.Date.valueOf(colMetadata.getMaxValue().toString()));
-    } else if (colType instanceof LongType) {
-      return Pair.of(
-          new Long(colMetadata.getMinValue().toString()),
-          new Long(colMetadata.getMaxValue().toString()));
-    } else if (colType instanceof ShortType) {
-      return Pair.of(
-          new Short(colMetadata.getMinValue().toString()),
-          new Short(colMetadata.getMaxValue().toString()));
-    } else if (colType instanceof FloatType) {
-      return Pair.of(
-          new Float(colMetadata.getMinValue().toString()),
-          new Float(colMetadata.getMaxValue().toString()));
-    } else if (colType instanceof BinaryType) {
-      return Pair.of(
-          ((ByteBuffer) colMetadata.getMinValue()).array(),
-          ((ByteBuffer) colMetadata.getMaxValue()).array());
-    } else if (colType instanceof BooleanType) {
-      return Pair.of(
-          Boolean.valueOf(colMetadata.getMinValue().toString()),
-          Boolean.valueOf(colMetadata.getMaxValue().toString()));
-    } else if (colType instanceof ByteType) {
-      return Pair.of(
-          Byte.valueOf(colMetadata.getMinValue().toString()),
-          Byte.valueOf(colMetadata.getMaxValue().toString()));
-    }  else {
-      throw new HoodieException(String.format("Not support type:  %s", colType));
-    }
-  }
-
-  /**
-   * @VisibleForTesting
-   */
-  @Nonnull
-  static String createIndexMergeSql(
-      @Nonnull String originalIndexTable,
-      @Nonnull String newIndexTable,
-      @Nonnull List<String> columns
-  ) {
-    StringBuilder selectBody = new StringBuilder();
-
-    for (int i = 0; i < columns.size(); ++i) {
-      String col = columns.get(i);
-      String originalTableColumn = String.format("%s.%s", originalIndexTable, col);
-      String newTableColumn = String.format("%s.%s", newIndexTable, col);
-
-      selectBody.append(
-          // NOTE: We prefer values from the new index table, and fallback to the original one only
-          //       in case it does not contain statistics for the given file path
-          String.format("if (%s is null, %s, %s) AS %s", newTableColumn, originalTableColumn, newTableColumn, col)
-      );
-
-      if (i < columns.size() - 1) {
-        selectBody.append(", ");
-      }
-    }
-
-    return String.format(
-        "SELECT %s FROM %s FULL JOIN %s ON %s = %s",
-        selectBody,
-        originalIndexTable,
-        newIndexTable,
-        String.format("%s.%s", originalIndexTable, columns.get(0)),
-        String.format("%s.%s", newIndexTable, columns.get(0))
-    );
-  }
-}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
index 6c2e8c50cdf61..7e64d83879f05 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
@@ -56,7 +56,7 @@ public HoodieInternalRowParquetWriter(Path file, HoodieRowParquetConfig parquetC
 
   @Override
   public boolean canWrite() {
-    return fs.getBytesWritten(file) < maxFileSize;
+    return getDataSize() < maxFileSize;
   }
 
   @Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
index 5cdb2ff68fc63..ce3cd6f09768d 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
@@ -93,7 +93,8 @@ public HoodieRowCreateHandle(HoodieTable table, HoodieWriteConfig writeConfig, S
               fs,
               instantTime,
               new Path(writeConfig.getBasePath()),
-              FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath));
+              FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath),
+              table.getPartitionMetafileFormat());
       partitionMetadata.trySave(taskPartitionId);
       createMarkerFile(partitionPath, FSUtils.makeDataFileName(this.instantTime, getWriteToken(), this.fileId, table.getBaseFileExtension()));
       this.fileWriter = createNewFileWriter(path, table, writeConfig, structType);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
index 80b94edf7ecd6..7d94b2d4f53f1 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
@@ -25,8 +25,11 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.metrics.Registry;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.CommitUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -43,6 +46,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetadataWriter {
 
@@ -136,21 +140,29 @@ protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<
         compactIfNecessary(writeClient, instantTime);
       }
 
-      if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
+      if (!metadataMetaClient.getActiveTimeline().containsInstant(instantTime)) {
         // if this is a new commit being applied to metadata for the first time
         writeClient.startCommitWithTime(instantTime);
       } else {
-        // this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
-        // for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
-        // when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
-        // are upserts to metadata table and so only a new delta commit will be created.
-        // once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
-        // already part of completed commit. So, we have to manually remove the completed instant and proceed.
-        // and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
-        HoodieInstant alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
-        HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
-        metadataMetaClient.reloadActiveTimeline();
+        Option<HoodieInstant> alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant();
+        if (alreadyCompletedInstant.isPresent()) {
+          // this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
+          // for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
+          // when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
+          // are upserts to metadata table and so only a new delta commit will be created.
+          // once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
+          // already part of completed commit. So, we have to manually remove the completed instant and proceed.
+          // and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
+          HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant.get());
+          metadataMetaClient.reloadActiveTimeline();
+        }
+        // If the alreadyCompletedInstant is empty, that means there is a requested or inflight
+        // instant with the same instant time.  This happens for data table clean action which
+        // reuses the same instant time without rollback first.  It is a no-op here as the
+        // clean plan is the same, so we don't need to delete the requested and inflight instant
+        // files in the active timeline.
       }
+      
       List<WriteStatus> statuses = writeClient.upsertPreppedRecords(preppedRecordRDD, instantTime).collect();
       statuses.forEach(writeStatus -> {
         if (writeStatus.hasErrors()) {
@@ -169,4 +181,16 @@ protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<
     // Update total size of the metadata and count of base/log files
     metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
   }
+
+  @Override
+  public void deletePartitions(String instantTime, List<MetadataPartitionType> partitions) {
+    List<String> partitionsToDrop = partitions.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toList());
+    LOG.info("Deleting Metadata Table partitions: " + partitionsToDrop);
+
+    try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, metadataWriteConfig, true)) {
+      String actionType = CommitUtils.getCommitActionType(WriteOperationType.DELETE_PARTITION, HoodieTableType.MERGE_ON_READ);
+      writeClient.startCommitWithTime(instantTime, actionType);
+      writeClient.deletePartitions(partitionsToDrop, instantTime);
+    }
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
index 8f5211212253a..a88ca65c35a94 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
@@ -18,12 +18,12 @@
 
 package org.apache.hudi.table;
 
-import org.apache.hudi.AvroConversionUtils;
-import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -38,23 +38,20 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.exception.HoodieUpsertException;
-import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper;
 import org.apache.hudi.io.HoodieCreateHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieSortedMergeHandle;
 import org.apache.hudi.keygen.BaseKeyGenerator;
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.action.bootstrap.SparkBootstrapCommitActionExecutor;
@@ -73,26 +70,21 @@
 import org.apache.hudi.table.action.commit.SparkInsertPreppedCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkUpsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkUpsertPreppedCommitActionExecutor;
+import org.apache.hudi.table.action.index.RunIndexActionExecutor;
+import org.apache.hudi.table.action.index.ScheduleIndexActionExecutor;
 import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
 import org.apache.hudi.table.action.rollback.RestorePlanActionExecutor;
 import org.apache.hudi.table.action.savepoint.SavepointActionExecutor;
-
-import org.apache.avro.Schema;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import javax.annotation.Nonnull;
-
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.stream.Collectors;
 
 /**
  * Implementation of a very heavily read-optimized Hoodie Table where, all data is stored in base files, with
@@ -172,63 +164,6 @@ public HoodieWriteMetadata<HoodieData<WriteStatus>> insertOverwriteTable(HoodieE
     return new SparkInsertOverwriteTableCommitActionExecutor(context, config, this, instantTime, records).execute();
   }
 
-  @Override
-  public void updateMetadataIndexes(@Nonnull HoodieEngineContext context, @Nonnull List<HoodieWriteStat> stats, @Nonnull String instantTime) throws Exception {
-    updateColumnsStatsIndex(context, stats, instantTime);
-  }
-
-  private void updateColumnsStatsIndex(
-      @Nonnull HoodieEngineContext context,
-      @Nonnull List<HoodieWriteStat> updatedFilesStats,
-      @Nonnull String instantTime
-  ) throws Exception {
-    String sortColsList = config.getClusteringSortColumns();
-    String basePath = metaClient.getBasePath();
-    String indexPath = metaClient.getColumnStatsIndexPath();
-
-    List<String> touchedFiles =
-        updatedFilesStats.stream()
-            .map(s -> new Path(basePath, s.getPath()).toString())
-            .collect(Collectors.toList());
-
-    if (touchedFiles.isEmpty() || StringUtils.isNullOrEmpty(sortColsList) || StringUtils.isNullOrEmpty(indexPath)) {
-      return;
-    }
-
-    LOG.info(String.format("Updating column-statistics index table (%s)", indexPath));
-
-    List<String> sortCols = Arrays.stream(sortColsList.split(","))
-        .map(String::trim)
-        .collect(Collectors.toList());
-
-    HoodieSparkEngineContext sparkEngineContext = (HoodieSparkEngineContext)context;
-
-    // Fetch table schema to appropriately construct col-stats index schema
-    Schema tableWriteSchema =
-        HoodieAvroUtils.createHoodieWriteSchema(
-            new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields()
-        );
-
-    List<String> completedCommits =
-        metaClient.getCommitsTimeline()
-            .filterCompletedInstants()
-            .getInstants()
-            .map(HoodieInstant::getTimestamp)
-            .collect(Collectors.toList());
-
-    ColumnStatsIndexHelper.updateColumnStatsIndexFor(
-        sparkEngineContext.getSqlContext().sparkSession(),
-        AvroConversionUtils.convertAvroSchemaToStructType(tableWriteSchema),
-        touchedFiles,
-        sortCols,
-        indexPath,
-        instantTime,
-        completedCommits
-    );
-
-    LOG.info(String.format("Successfully updated column-statistics index at instant (%s)", instantTime));
-  }
-
   @Override
   public Option<HoodieCompactionPlan> scheduleCompaction(HoodieEngineContext context, String instantTime, Option<Map<String, String>> extraMetadata) {
     throw new HoodieNotSupportedException("Compaction is not supported on a CopyOnWrite table");
@@ -346,6 +281,16 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context, String rollb
         deleteInstants, skipLocking).execute();
   }
 
+  @Override
+  public Option<HoodieIndexPlan> scheduleIndexing(HoodieEngineContext context, String indexInstantTime, List<MetadataPartitionType> partitionsToIndex) {
+    return new ScheduleIndexActionExecutor<>(context, config, this, indexInstantTime, partitionsToIndex).execute();
+  }
+
+  @Override
+  public Option<HoodieIndexCommitMetadata> index(HoodieEngineContext context, String indexInstantTime) {
+    return new RunIndexActionExecutor<>(context, config, this, indexInstantTime).execute();
+  }
+
   @Override
   public HoodieSavepointMetadata savepoint(HoodieEngineContext context, String instantToSavepoint, String user, String comment) {
     return new SavepointActionExecutor<>(context, config, this, instantToSavepoint, user, comment).execute();
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
index ce14d43cfc6e9..71efe89a055e1 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
@@ -113,6 +113,9 @@ public <R extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetad
       // existence after the creation is needed.
       final HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(
           context.getHadoopConf().get(), config, context, actionMetadata, Option.of(triggeringInstantTimestamp));
+      // even with metadata enabled, some index could have been disabled
+      // delete metadata partitions corresponding to such indexes
+      deleteMetadataIndexIfNecessary();
       try {
         if (isMetadataTableExists || metaClient.getFs().exists(new Path(
             HoodieTableMetadata.getMetadataTableBasePath(metaClient.getBasePath())))) {
@@ -122,6 +125,8 @@ public <R extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetad
       } catch (IOException e) {
         throw new HoodieMetadataException("Checking existence of metadata table failed", e);
       }
+    } else {
+      maybeDeleteMetadataTable();
     }
 
     return Option.empty();
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
index b31eb7b96d948..149aef03e238a 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
@@ -18,24 +18,32 @@
 
 package org.apache.hudi.table.action.commit;
 
+import java.time.Duration;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.data.HoodieJavaPairRDD;
+import org.apache.hudi.exception.HoodieDeletePartitionException;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.WorkloadProfile;
 import org.apache.hudi.table.WorkloadStat;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 
-import java.time.Duration;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
 
 public class SparkDeletePartitionCommitActionExecutor<T extends HoodieRecordPayload<T>>
     extends SparkInsertOverwriteCommitActionExecutor<T> {
@@ -50,16 +58,35 @@ public SparkDeletePartitionCommitActionExecutor(HoodieEngineContext context,
 
   @Override
   public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
-    HoodieTimer timer = new HoodieTimer().startTimer();
-    context.setJobStatus(this.getClass().getSimpleName(), "Gather all file ids from all deleting partitions.");
-    Map<String, List<String>> partitionToReplaceFileIds = HoodieJavaPairRDD.getJavaPairRDD(context.parallelize(partitions).distinct()
-        .mapToPair(partitionPath -> Pair.of(partitionPath, getAllExistingFileIds(partitionPath)))).collectAsMap();
-    HoodieWriteMetadata<HoodieData<WriteStatus>> result = new HoodieWriteMetadata<>();
-    result.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
-    result.setIndexUpdateDuration(Duration.ofMillis(timer.endTimer()));
-    result.setWriteStatuses(context.emptyHoodieData());
-    this.saveWorkloadProfileMetadataToInflight(new WorkloadProfile(Pair.of(new HashMap<>(), new WorkloadStat())), instantTime);
-    this.commitOnAutoCommit(result);
-    return result;
+    try {
+      HoodieTimer timer = new HoodieTimer().startTimer();
+      context.setJobStatus(this.getClass().getSimpleName(), "Gather all file ids from all deleting partitions.");
+      Map<String, List<String>> partitionToReplaceFileIds =
+          HoodieJavaPairRDD.getJavaPairRDD(context.parallelize(partitions).distinct()
+              .mapToPair(partitionPath -> Pair.of(partitionPath, getAllExistingFileIds(partitionPath)))).collectAsMap();
+      HoodieWriteMetadata<HoodieData<WriteStatus>> result = new HoodieWriteMetadata<>();
+      result.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
+      result.setIndexUpdateDuration(Duration.ofMillis(timer.endTimer()));
+      result.setWriteStatuses(context.emptyHoodieData());
+
+      // created requested
+      HoodieInstant dropPartitionsInstant = new HoodieInstant(REQUESTED, REPLACE_COMMIT_ACTION, instantTime);
+      if (!table.getMetaClient().getFs().exists(new Path(table.getMetaClient().getMetaPath(),
+          dropPartitionsInstant.getFileName()))) {
+        HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
+            .setOperationType(WriteOperationType.DELETE_PARTITION.name())
+            .setExtraMetadata(extraMetadata.orElse(Collections.emptyMap()))
+            .build();
+        table.getMetaClient().getActiveTimeline().saveToPendingReplaceCommit(dropPartitionsInstant,
+            TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
+      }
+
+      this.saveWorkloadProfileMetadataToInflight(new WorkloadProfile(Pair.of(new HashMap<>(), new WorkloadStat())),
+          instantTime);
+      this.commitOnAutoCommit(result);
+      return result;
+    } catch (Exception e) {
+      throw new HoodieDeletePartitionException("Failed to drop partitions for commit time " + instantTime, e);
+    }
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
index 69005cd75332c..8cb0e239eb51a 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
@@ -17,13 +17,13 @@
  */
 
 package org.apache.hudi
+
 import org.apache.avro.Schema.Type
 import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
 import org.apache.avro.{AvroRuntimeException, JsonProperties, Schema}
 import org.apache.hudi.HoodieSparkUtils.sparkAdapter
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.avro.SchemaConverters
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
@@ -62,10 +62,12 @@ object AvroConversionUtils {
    * @param rootCatalystType Catalyst [[StructType]] to be transformed into
    * @return converter accepting Avro payload and transforming it into a Catalyst one (in the form of [[InternalRow]])
    */
-  def createAvroToInternalRowConverter(rootAvroType: Schema, rootCatalystType: StructType): GenericRecord => Option[InternalRow] =
-    record => sparkAdapter.createAvroDeserializer(rootAvroType, rootCatalystType)
+  def createAvroToInternalRowConverter(rootAvroType: Schema, rootCatalystType: StructType): GenericRecord => Option[InternalRow] = {
+    val deserializer = sparkAdapter.createAvroDeserializer(rootAvroType, rootCatalystType)
+    record => deserializer
       .deserialize(record)
       .map(_.asInstanceOf[InternalRow])
+  }
 
   /**
    * Creates converter to transform Catalyst payload into Avro one
@@ -76,7 +78,8 @@ object AvroConversionUtils {
    * @return converter accepting Catalyst payload (in the form of [[InternalRow]]) and transforming it into an Avro one
    */
   def createInternalRowToAvroConverter(rootCatalystType: StructType, rootAvroType: Schema, nullable: Boolean): InternalRow => GenericRecord = {
-    row => sparkAdapter.createAvroSerializer(rootCatalystType, rootAvroType, nullable)
+    val serializer = sparkAdapter.createAvroSerializer(rootCatalystType, rootAvroType, nullable)
+    row => serializer
       .serialize(row)
       .asInstanceOf[GenericRecord]
   }
@@ -133,27 +136,36 @@ object AvroConversionUtils {
   }
 
   /**
-    *
-    * Returns avro schema from spark StructType.
-    *
-    * @param structType       Dataframe Struct Type.
-    * @param structName       Avro record name.
-    * @param recordNamespace  Avro record namespace.
-    * @return                 Avro schema corresponding to given struct type.
-    */
+   *
+   * Returns avro schema from spark StructType.
+   *
+   * @param structType      Dataframe Struct Type.
+   * @param structName      Avro record name.
+   * @param recordNamespace Avro record namespace.
+   * @return Avro schema corresponding to given struct type.
+   */
   def convertStructTypeToAvroSchema(structType: DataType,
                                     structName: String,
                                     recordNamespace: String): Schema = {
-    getAvroSchemaWithDefaults(SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace), structType)
+    val schemaConverters = sparkAdapter.getAvroSchemaConverters
+    val avroSchema = schemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
+    getAvroSchemaWithDefaults(avroSchema, structType)
+  }
+
+  def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
+    val schemaConverters = sparkAdapter.getAvroSchemaConverters
+    schemaConverters.toSqlType(avroSchema) match {
+      case (dataType, _) => dataType.asInstanceOf[StructType]
+    }
   }
 
   /**
-    *
-    * Method to add default value of null to nullable fields in given avro schema
-    *
-    * @param schema     input avro schema
-    * @return           Avro schema with null default set to nullable fields
-    */
+   *
+   * Method to add default value of null to nullable fields in given avro schema
+   *
+   * @param schema input avro schema
+   * @return Avro schema with null default set to nullable fields
+   */
   def getAvroSchemaWithDefaults(schema: Schema, dataType: DataType): Schema = {
 
     schema.getType match {
@@ -202,21 +214,6 @@ object AvroConversionUtils {
     }
   }
 
-  def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
-    SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
-  }
-
-  def buildAvroRecordBySchema(record: IndexedRecord,
-                              requiredSchema: Schema,
-                              requiredPos: Seq[Int],
-                              recordBuilder: GenericRecordBuilder): GenericRecord = {
-    val requiredFields = requiredSchema.getFields.asScala
-    assert(requiredFields.length == requiredPos.length)
-    val positionIterator = requiredPos.iterator
-    requiredFields.foreach(f => recordBuilder.set(f, record.get(positionIterator.next())))
-    recordBuilder.build()
-  }
-
   def getAvroRecordNameAndNamespace(tableName: String): (String, String) = {
     val name = HoodieAvroUtils.sanitizeName(tableName)
     (s"${name}_record", s"hoodie.${name}")
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala
index eaaf82182a77e..547c6aed628cc 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala
@@ -18,6 +18,8 @@
 
 package org.apache.hudi
 
+import org.apache.hudi.common.config.TypedProperties
+
 object HoodieConversionUtils {
 
   def toJavaOption[T](opt: Option[T]): org.apache.hudi.common.util.Option[T] =
@@ -26,4 +28,10 @@ object HoodieConversionUtils {
   def toScalaOption[T](opt: org.apache.hudi.common.util.Option[T]): Option[T] =
     if (opt.isPresent) Some(opt.get) else None
 
+  def toProperties(params: Map[String, String]): TypedProperties = {
+    val props = new TypedProperties()
+    params.foreach(kv => props.setProperty(kv._1, kv._2))
+    props
+  }
+
 }
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
index cce6eacb03d73..57eb32fce3623 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
@@ -31,14 +31,17 @@ import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
 import org.apache.hudi.keygen.{BaseKeyGenerator, CustomAvroKeyGenerator, CustomKeyGenerator, KeyGenerator}
 import org.apache.spark.SPARK_VERSION
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal}
-import org.apache.spark.sql.execution.datasources.{FileStatusCache, InMemoryFileIndex}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
-import org.apache.spark.sql.{DataFrame, SparkSession}
-
 import java.util.Properties
+
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
+import org.apache.hudi.internal.schema.utils.InternalSchemaUtils
+
 import scala.collection.JavaConverters._
 
 object HoodieSparkUtils extends SparkAdapterSupport {
@@ -55,6 +58,10 @@ object HoodieSparkUtils extends SparkAdapterSupport {
 
   def gteqSpark3_2: Boolean = SPARK_VERSION > "3.2"
 
+  def gteqSpark3_1: Boolean = SPARK_VERSION > "3.1"
+
+  def gteqSpark3_1_3: Boolean = SPARK_VERSION >= "3.1.3"
+
   def getMetaSchema: StructType = {
     StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {
       StructField(col, StringType, nullable = true)
@@ -302,17 +309,25 @@ object HoodieSparkUtils extends SparkAdapterSupport {
     AttributeReference(columnName, field.get.dataType, field.get.nullable)()
   }
 
-  def getRequiredSchema(tableAvroSchema: Schema, requiredColumns: Array[String]): (Schema, StructType) = {
-    // First get the required avro-schema, then convert the avro-schema to spark schema.
-    val name2Fields = tableAvroSchema.getFields.asScala.map(f => f.name() -> f).toMap
-    // Here have to create a new Schema.Field object
-    // to prevent throwing exceptions like "org.apache.avro.AvroRuntimeException: Field already used".
-    val requiredFields = requiredColumns.map(c => name2Fields(c))
-      .map(f => new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())).toList
-    val requiredAvroSchema = Schema.createRecord(tableAvroSchema.getName, tableAvroSchema.getDoc,
-      tableAvroSchema.getNamespace, tableAvroSchema.isError, requiredFields.asJava)
-    val requiredStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(requiredAvroSchema)
-    (requiredAvroSchema, requiredStructSchema)
+  def getRequiredSchema(tableAvroSchema: Schema, requiredColumns: Array[String], internalSchema: InternalSchema = InternalSchema.getEmptyInternalSchema): (Schema, StructType, InternalSchema) = {
+    if (internalSchema.isEmptySchema || requiredColumns.isEmpty) {
+      // First get the required avro-schema, then convert the avro-schema to spark schema.
+      val name2Fields = tableAvroSchema.getFields.asScala.map(f => f.name() -> f).toMap
+      // Here have to create a new Schema.Field object
+      // to prevent throwing exceptions like "org.apache.avro.AvroRuntimeException: Field already used".
+      val requiredFields = requiredColumns.map(c => name2Fields(c))
+        .map(f => new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())).toList
+      val requiredAvroSchema = Schema.createRecord(tableAvroSchema.getName, tableAvroSchema.getDoc,
+        tableAvroSchema.getNamespace, tableAvroSchema.isError, requiredFields.asJava)
+      val requiredStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(requiredAvroSchema)
+      (requiredAvroSchema, requiredStructSchema, internalSchema)
+    } else {
+      // now we support nested project
+      val prunedInternalSchema = InternalSchemaUtils.pruneInternalSchema(internalSchema, requiredColumns.toList.asJava)
+      val requiredAvroSchema = AvroInternalSchemaConverter.convert(prunedInternalSchema, tableAvroSchema.getName)
+      val requiredStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(requiredAvroSchema)
+      (requiredAvroSchema, requiredStructSchema, prunedInternalSchema)
+    }
   }
 
   def toAttribute(tableSchema: StructType): Seq[AttributeReference] = {
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala
index fb6a5813ab9e0..16d9253ad6093 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala
@@ -27,10 +27,10 @@ import org.apache.spark.sql.hudi.SparkAdapter
 trait SparkAdapterSupport {
 
   lazy val sparkAdapter: SparkAdapter = {
-    val adapterClass = if (HoodieSparkUtils.gteqSpark3_2) {
+    val adapterClass = if (HoodieSparkUtils.isSpark3_2) {
       "org.apache.spark.sql.adapter.Spark3_2Adapter"
     } else if (HoodieSparkUtils.isSpark3_0 || HoodieSparkUtils.isSpark3_1) {
-      "org.apache.spark.sql.adapter.Spark3Adapter"
+      "org.apache.spark.sql.adapter.Spark3_1Adapter"
     } else {
       "org.apache.spark.sql.adapter.Spark2Adapter"
     }
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala
new file mode 100644
index 0000000000000..fe30f61b92981
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, SubqueryExpression}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.types.StructType
+
+trait HoodieCatalystExpressionUtils {
+
+  /**
+   * Parses and resolves expression against the attributes of the given table schema.
+   *
+   * For example:
+   * <pre>
+   * ts > 1000 and ts <= 1500
+   * </pre>
+   * will be resolved as
+   * <pre>
+   * And(GreaterThan(ts#590L > 1000), LessThanOrEqual(ts#590L <= 1500))
+   * </pre>
+   *
+   * Where <pre>ts</pre> is a column of the provided [[tableSchema]]
+   *
+   * @param spark       spark session
+   * @param exprString  string representation of the expression to parse and resolve
+   * @param tableSchema table schema encompassing attributes to resolve against
+   * @return Resolved filter expression
+   */
+  def resolveExpr(spark: SparkSession, exprString: String, tableSchema: StructType): Expression = {
+    val expr = spark.sessionState.sqlParser.parseExpression(exprString)
+    resolveExpr(spark, expr, tableSchema)
+  }
+
+  /**
+   * Resolves provided expression (unless already resolved) against the attributes of the given table schema.
+   *
+   * For example:
+   * <pre>
+   * ts > 1000 and ts <= 1500
+   * </pre>
+   * will be resolved as
+   * <pre>
+   * And(GreaterThan(ts#590L > 1000), LessThanOrEqual(ts#590L <= 1500))
+   * </pre>
+   *
+   * Where <pre>ts</pre> is a column of the provided [[tableSchema]]
+   *
+   * @param spark       spark session
+   * @param expr        Catalyst expression to be resolved (if not yet)
+   * @param tableSchema table schema encompassing attributes to resolve against
+   * @return Resolved filter expression
+   */
+  def resolveExpr(spark: SparkSession, expr: Expression, tableSchema: StructType): Expression = {
+    val analyzer = spark.sessionState.analyzer
+    val schemaFields = tableSchema.fields
+
+    val resolvedExpr = {
+      val plan: LogicalPlan = Filter(expr, LocalRelation(schemaFields.head, schemaFields.drop(1): _*))
+      analyzer.execute(plan).asInstanceOf[Filter].condition
+    }
+
+    if (!hasUnresolvedRefs(resolvedExpr)) {
+      resolvedExpr
+    } else {
+      throw new IllegalStateException("unresolved attribute")
+    }
+  }
+
+  /**
+   * Split the given predicates into two sequence predicates:
+   * - predicates that references partition columns only(and involves no sub-query);
+   * - other predicates.
+   *
+   * @param sparkSession     The spark session
+   * @param predicates       The predicates to be split
+   * @param partitionColumns The partition columns
+   * @return (partitionFilters, dataFilters)
+   */
+  def splitPartitionAndDataPredicates(sparkSession: SparkSession,
+                                      predicates: Array[Expression],
+                                      partitionColumns: Array[String]): (Array[Expression], Array[Expression]) = {
+    // Validates that the provided names both resolve to the same entity
+    val resolvedNameEquals = sparkSession.sessionState.analyzer.resolver
+
+    predicates.partition(expr => {
+      // Checks whether given expression only references partition columns(and involves no sub-query)
+      expr.references.forall(r => partitionColumns.exists(resolvedNameEquals(r.name, _))) &&
+        !SubqueryExpression.hasSubquery(expr)
+    })
+  }
+
+  /**
+   * Matches an expression iff
+   *
+   * <ol>
+   *   <li>It references exactly one [[AttributeReference]]</li>
+   *   <li>It contains only whitelisted transformations that preserve ordering of the source column [1]</li>
+   * </ol>
+   *
+   * [1] Preserving ordering is defined as following: transformation T is defined as ordering preserving in case
+   *     values of the source column A values being ordered as a1, a2, a3 ..., will map into column B = T(A) which
+   *     will keep the same ordering b1, b2, b3, ... with b1 = T(a1), b2 = T(a2), ...
+   */
+  def tryMatchAttributeOrderingPreservingTransformation(expr: Expression): Option[AttributeReference]
+
+  private def hasUnresolvedRefs(resolvedExpr: Expression): Boolean =
+    resolvedExpr.collectFirst {
+      case _: UnresolvedAttribute | _: UnresolvedFunction => true
+    }.isDefined
+}
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSchemaConverters.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSchemaConverters.scala
new file mode 100644
index 0000000000000..9b068afac83d2
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSchemaConverters.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.types.DataType
+
+/**
+ * Allows to convert Avro schema into Spark's Catalyst one
+ */
+trait HoodieAvroSchemaConverters {
+
+  def toSqlType(avroSchema: Schema): (DataType, Boolean)
+
+  def toAvroType(catalystType: DataType, nullable: Boolean, recordName: String, nameSpace: String = ""): Schema
+
+}
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
index e41a9c1c8e326..d8ed173547851 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.hudi
 
 import org.apache.avro.Schema
 import org.apache.hudi.client.utils.SparkRowSerDe
-import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSerializer}
+import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -32,16 +32,24 @@ import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.execution.datasources.{FilePartition, LogicalRelation, PartitionedFile, SparkParsePartitionUtil}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{HoodieCatalystExpressionUtils, Row, SparkSession}
 import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 
 import java.util.Locale
 
 /**
- * An interface to adapter the difference between spark2 and spark3
- * in some spark related class.
+ * Interface adapting discrepancies and incompatibilities between different Spark versions
  */
 trait SparkAdapter extends Serializable {
 
+  /**
+   * Creates instance of [[HoodieCatalystExpressionUtils]] providing for common utils operating
+   * on Catalyst Expressions
+   */
+  def createCatalystExpressionUtils(): HoodieCatalystExpressionUtils
+
   /**
    * Creates instance of [[HoodieAvroSerializer]] providing for ability to serialize
    * Spark's [[InternalRow]] into Avro payloads
@@ -54,6 +62,11 @@ trait SparkAdapter extends Serializable {
    */
   def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer
 
+  /**
+   * Creates instance of [[HoodieAvroSchemaConverters]] allowing to convert b/w Avro and Catalyst schemas
+   */
+  def getAvroSchemaConverters: HoodieAvroSchemaConverters
+
   /**
    * Create the SparkRowSerDe.
    */
@@ -157,4 +170,14 @@ trait SparkAdapter extends Serializable {
         other
     }
   }
+
+  /**
+    * Create customresolutionRule to deal with alter command for hudi.
+    */
+  def createResolveHudiAlterTableCommand(sparkSession: SparkSession): Rule[LogicalPlan]
+
+  /**
+    * Create hoodie parquet file format.
+    */
+  def createHoodieParquetFileFormat(): Option[ParquetFileFormat]
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
index 3b5393527fd79..f6315eec7d211 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
@@ -20,6 +20,8 @@
 
 import org.apache.hudi.avro.model.HoodieInstantInfo;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
+import org.apache.hudi.avro.model.HoodieRollbackRequest;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
@@ -48,6 +50,9 @@
 
 import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.util.Arrays;
 import java.util.Collections;
@@ -55,10 +60,13 @@
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
+import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
 import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -269,11 +277,19 @@ public void testRollbackCommit() throws Exception {
     }
   }
 
+  private static Stream<Arguments> testFailedRollbackCommitParams() {
+    return Arrays.stream(new Boolean[][] {
+        {true, true}, {true, false}, {false, true}, {false, false},
+    }).map(Arguments::of);
+  }
+
   /**
    * Test Cases for effects of rollbacking completed/inflight commits.
    */
-  @Test
-  public void testFailedRollbackCommit() throws Exception {
+  @ParameterizedTest
+  @MethodSource("testFailedRollbackCommitParams")
+  public void testFailedRollbackCommit(
+      boolean enableMetadataTable, boolean instantToRollbackExists) throws Exception {
     // Let's create some commit files and base files
     final String p1 = "2016/05/01";
     final String p2 = "2016/05/02";
@@ -302,21 +318,34 @@ public void testFailedRollbackCommit() throws Exception {
         put(p3, "id33");
       }
     };
-    HoodieTestTable testTable = HoodieTestTable.of(metaClient)
-        .withPartitionMetaFiles(p1, p2, p3)
-        .addCommit(commitTime1)
-        .withBaseFilesInPartitions(partitionAndFileId1)
-        .addCommit(commitTime2)
-        .withBaseFilesInPartitions(partitionAndFileId2)
-        .addInflightCommit(commitTime3)
-        .withBaseFilesInPartitions(partitionAndFileId3);
 
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
         .withRollbackUsingMarkers(false)
+        .withMetadataConfig(
+            HoodieMetadataConfig.newBuilder()
+                // Column Stats Index is disabled, since these tests construct tables which are
+                // not valid (empty commit metadata, invalid parquet files)
+                .withMetadataIndexColumnStats(false)
+                .enable(enableMetadataTable)
+                .build()
+        )
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build())
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
 
+    HoodieTestTable testTable = enableMetadataTable
+        ? HoodieMetadataTestTable.of(metaClient, SparkHoodieBackedTableMetadataWriter.create(
+        metaClient.getHadoopConf(), config, context))
+        : HoodieTestTable.of(metaClient);
+
+    testTable.withPartitionMetaFiles(p1, p2, p3)
+        .addCommit(commitTime1)
+        .withBaseFilesInPartitions(partitionAndFileId1)
+        .addCommit(commitTime2)
+        .withBaseFilesInPartitions(partitionAndFileId2)
+        .addInflightCommit(commitTime3)
+        .withBaseFilesInPartitions(partitionAndFileId3);
+
     try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
 
       // Rollback commit3
@@ -333,8 +362,10 @@ public void testFailedRollbackCommit() throws Exception {
       // delete rollback completed meta file and retry rollback.
       FileCreateUtils.deleteRollbackCommit(basePath, rollbackInstant.getTimestamp());
 
-      // recreate actual commit files so that we can retry the rollback
-      testTable.addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3);
+      if (instantToRollbackExists) {
+        // recreate actual commit files if needed
+        testTable.addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3);
+      }
 
       // retry rolling back the commit again.
       client.rollback(commitTime3);
@@ -453,4 +484,107 @@ public void testAutoRollbackInflightCommit() throws Exception {
       assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
     }
   }
+
+  private static Stream<Arguments> testRollbackWithRequestedRollbackPlanParams() {
+    return Arrays.stream(new Boolean[][] {
+        {true, true}, {true, false}, {false, true}, {false, false},
+    }).map(Arguments::of);
+  }
+
+  @ParameterizedTest
+  @MethodSource("testRollbackWithRequestedRollbackPlanParams")
+  public void testRollbackWithRequestedRollbackPlan(boolean enableMetadataTable, boolean isRollbackPlanCorrupted) throws Exception {
+    // Let's create some commit files and base files
+    final String p1 = "2022/04/05";
+    final String p2 = "2022/04/06";
+    final String commitTime1 = "20220406010101002";
+    final String commitTime2 = "20220406020601002";
+    final String commitTime3 = "20220406030611002";
+    final String rollbackInstantTime = "20220406040611002";
+    Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {
+      {
+        put(p1, "id11");
+        put(p2, "id12");
+      }
+    };
+    Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {
+      {
+        put(p1, "id21");
+        put(p2, "id22");
+      }
+    };
+    Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {
+      {
+        put(p1, "id31");
+        put(p2, "id32");
+      }
+    };
+
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withRollbackUsingMarkers(false)
+        .withMetadataConfig(
+            HoodieMetadataConfig.newBuilder()
+                // Column Stats Index is disabled, since these tests construct tables which are
+                // not valid (empty commit metadata, invalid parquet files)
+                .withMetadataIndexColumnStats(false)
+                .enable(enableMetadataTable)
+                .build()
+        )
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build())
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
+
+    HoodieTestTable testTable = enableMetadataTable
+        ? HoodieMetadataTestTable.of(metaClient, SparkHoodieBackedTableMetadataWriter.create(
+        metaClient.getHadoopConf(), config, context))
+        : HoodieTestTable.of(metaClient);
+
+    testTable.withPartitionMetaFiles(p1, p2)
+        .addCommit(commitTime1)
+        .withBaseFilesInPartitions(partitionAndFileId1)
+        .addCommit(commitTime2)
+        .withBaseFilesInPartitions(partitionAndFileId2)
+        .addInflightCommit(commitTime3)
+        .withBaseFilesInPartitions(partitionAndFileId3);
+
+    try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
+      if (isRollbackPlanCorrupted) {
+        // Add a corrupted requested rollback plan
+        FileCreateUtils.createRequestedRollbackFile(metaClient.getBasePath(), rollbackInstantTime, new byte[] {0, 1, 2});
+      } else {
+        // Add a valid requested rollback plan to roll back commitTime3
+        HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan();
+        List<HoodieRollbackRequest> rollbackRequestList = partitionAndFileId3.keySet().stream()
+            .map(partition -> new HoodieRollbackRequest(partition, EMPTY_STRING, EMPTY_STRING,
+                Collections.singletonList(metaClient.getBasePath() + "/" + partition + "/"
+                    + FileCreateUtils.baseFileName(commitTime3, partitionAndFileId3.get(p1))),
+                Collections.emptyMap()))
+            .collect(Collectors.toList());
+        rollbackPlan.setRollbackRequests(rollbackRequestList);
+        rollbackPlan.setInstantToRollback(new HoodieInstantInfo(commitTime3, HoodieTimeline.COMMIT_ACTION));
+        FileCreateUtils.createRequestedRollbackFile(metaClient.getBasePath(), rollbackInstantTime, rollbackPlan);
+      }
+
+      // Rollback commit3
+      client.rollback(commitTime3);
+      assertFalse(testTable.inflightCommitExists(commitTime3));
+      assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
+      assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
+
+      metaClient.reloadActiveTimeline();
+      List<HoodieInstant> rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
+      // Corrupted requested rollback plan should be deleted before scheduling a new one
+      assertEquals(rollbackInstants.size(), 1);
+      HoodieInstant rollbackInstant = rollbackInstants.get(0);
+      assertTrue(rollbackInstant.isCompleted());
+
+      if (isRollbackPlanCorrupted) {
+        // Should create a new rollback instant
+        assertNotEquals(rollbackInstantTime, rollbackInstant.getTimestamp());
+      } else {
+        // Should reuse the rollback instant
+        assertEquals(rollbackInstantTime, rollbackInstant.getTimestamp());
+      }
+    }
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
index 3fb454940bf5d..1cb7bcbfc4fcb 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
@@ -291,7 +291,7 @@ public void testMORTable() throws Exception {
     }
 
     // Rollback to the original schema
-    client.restoreToInstant("004");
+    client.restoreToInstant("004", hoodieWriteConfig.isMetadataTableEnabled());
     checkLatestDeltaCommit("004");
 
     // Updates with original schema are now allowed
@@ -432,7 +432,7 @@ public void testCopyOnWriteTable() throws Exception {
 
     // Revert to the older commit and ensure that the original schema can now
     // be used for inserts and inserts.
-    client.restoreToInstant("003");
+    client.restoreToInstant("003", hoodieWriteConfig.isMetadataTableEnabled());
     curTimeline = metaClient.reloadActiveTimeline().getCommitTimeline().filterCompletedInstants();
     assertTrue(curTimeline.lastInstant().get().getTimestamp().equals("003"));
     checkReadRecords("000", numRecords);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
index 70f5e9f3bfd1d..a5926196ea396 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
@@ -29,6 +29,7 @@
 import org.apache.hudi.common.testutils.RawTripTestPayload;
 import org.apache.hudi.common.util.BaseFileUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.io.HoodieCreateHandle;
@@ -77,6 +78,7 @@ public void tearDown() throws IOException {
   private WriteStatus prepareFirstRecordCommit(List<String> recordsStrs) throws IOException {
     // Create a bunch of records with an old version of schema
     final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.avsc");
+    config.setValue(HoodieCompactionConfig.PRESERVE_COMMIT_METADATA, "false");
     final HoodieSparkTable table = HoodieSparkTable.create(config, context);
     final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
       List<HoodieRecord> insertRecords = new ArrayList<>();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index 1c40bc808c0be..34f470eb1b64a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -18,7 +18,17 @@
 
 package org.apache.hudi.client.functional;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.util.Time;
 import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.avro.model.HoodieCleanMetadata;
+import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
@@ -31,7 +41,9 @@
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.metrics.Registry;
 import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
@@ -42,6 +54,7 @@
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -66,6 +79,8 @@
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
+import org.apache.hudi.common.util.hash.ColumnIndexID;
+import org.apache.hudi.common.util.hash.PartitionIndexID;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
@@ -81,6 +96,7 @@
 import org.apache.hudi.metadata.HoodieMetadataMetrics;
 import org.apache.hudi.metadata.HoodieMetadataPayload;
 import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.HoodieSparkTable;
@@ -89,16 +105,6 @@
 import org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper;
 import org.apache.hudi.table.upgrade.UpgradeDowngrade;
 import org.apache.hudi.testutils.MetadataMergeWriteStatus;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.util.Time;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.avro.AvroSchemaConverter;
@@ -113,6 +119,7 @@
 import org.junit.jupiter.params.provider.MethodSource;
 import org.junit.jupiter.params.provider.ValueSource;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
@@ -142,6 +149,10 @@
 import static org.apache.hudi.common.model.WriteOperationType.INSERT;
 import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
 import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
+import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
+import static org.apache.hudi.metadata.MetadataPartitionType.FILES;
 import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -150,6 +161,7 @@
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
 
 @Tag("functional")
 public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
@@ -197,6 +209,182 @@ public void testMetadataTableBootstrap(HoodieTableType tableType, boolean addRol
     validateMetadata(testTable, true);
   }
 
+  @Test
+  public void testTurnOffMetadataIndexAfterEnable() throws Exception {
+    initPath();
+    HoodieWriteConfig cfg = getConfigBuilder(TRIP_EXAMPLE_SCHEMA, HoodieIndex.IndexType.BLOOM, HoodieFailedWritesCleaningPolicy.EAGER)
+        .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1)
+        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
+        .build();
+    init(COPY_ON_WRITE);
+    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+    // metadata enabled with only FILES partition
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, cfg)) {
+      // Insert
+      String commitTime = "0000001";
+      List<HoodieRecord> records = dataGen.generateInserts(commitTime, 20);
+      client.startCommitWithTime(commitTime);
+      List<WriteStatus> writeStatuses = client.insert(jsc.parallelize(records, 1), commitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+
+      // Upsert
+      commitTime = "0000002";
+      client.startCommitWithTime(commitTime);
+      records = dataGen.generateUniqueUpdates(commitTime, 10);
+      writeStatuses = client.upsert(jsc.parallelize(records, 1), commitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+      validateMetadata(client);
+    }
+    // check table config
+    HoodieTableMetaClient.reload(metaClient);
+    HoodieTableConfig tableConfig = metaClient.getTableConfig();
+    assertFalse(tableConfig.getMetadataPartitions().isEmpty());
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath()));
+    assertFalse(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath()));
+    assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath()));
+
+    // enable column stats and run 1 upserts
+    HoodieWriteConfig cfgWithColStatsEnabled = HoodieWriteConfig.newBuilder()
+        .withProperties(cfg.getProps())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withProperties(cfg.getMetadataConfig().getProps())
+            .withMetadataIndexColumnStats(true)
+            .build())
+        .build();
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, cfgWithColStatsEnabled)) {
+      // Upsert
+      String commitTime = "0000003";
+      client.startCommitWithTime(commitTime);
+      List<HoodieRecord> records = dataGen.generateUniqueUpdates(commitTime, 10);
+      List<WriteStatus> writeStatuses = client.upsert(jsc.parallelize(records, 1), commitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+      validateMetadata(client);
+    }
+    // check table config
+    HoodieTableMetaClient.reload(metaClient);
+    tableConfig = metaClient.getTableConfig();
+    assertFalse(tableConfig.getMetadataPartitions().isEmpty());
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath()));
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath()));
+    assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath()));
+
+    // disable column stats and run 1 upsert
+    HoodieWriteConfig cfgWithColStatsDisabled = HoodieWriteConfig.newBuilder()
+        .withProperties(cfg.getProps())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withProperties(cfg.getMetadataConfig().getProps())
+            .withMetadataIndexColumnStats(false)
+            .build())
+        .build();
+
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, cfgWithColStatsDisabled)) {
+      // Upsert
+      String commitTime = "0000004";
+      client.startCommitWithTime(commitTime);
+      List<HoodieRecord> records = dataGen.generateUniqueUpdates(commitTime, 10);
+      List<WriteStatus> writeStatuses = client.upsert(jsc.parallelize(records, 1), commitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+      validateMetadata(client);
+    }
+    // check table config
+    HoodieTableMetaClient.reload(metaClient);
+    tableConfig = metaClient.getTableConfig();
+    assertFalse(tableConfig.getMetadataPartitions().isEmpty());
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath()));
+    assertFalse(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath()));
+    assertFalse(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath()));
+
+    // enable bloom filter as well as column stats and run 1 upsert
+    HoodieWriteConfig cfgWithBloomFilterEnabled = HoodieWriteConfig.newBuilder()
+        .withProperties(cfgWithColStatsEnabled.getProps())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withProperties(cfgWithColStatsEnabled.getMetadataConfig().getProps())
+            .withMetadataIndexBloomFilter(true)
+            .build())
+        .build();
+
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, cfgWithBloomFilterEnabled)) {
+      // Upsert
+      String commitTime = "0000005";
+      client.startCommitWithTime(commitTime);
+      List<HoodieRecord> records = dataGen.generateUniqueUpdates(commitTime, 10);
+      List<WriteStatus> writeStatuses = client.upsert(jsc.parallelize(records, 1), commitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+      validateMetadata(client);
+    }
+    // check table config
+    HoodieTableMetaClient.reload(metaClient);
+    tableConfig = metaClient.getTableConfig();
+    assertFalse(tableConfig.getMetadataPartitions().isEmpty());
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(FILES.getPartitionPath()));
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(COLUMN_STATS.getPartitionPath()));
+    assertTrue(getCompletedMetadataPartitions(tableConfig).contains(BLOOM_FILTERS.getPartitionPath()));
+  }
+
+  @Test
+  public void testTurnOffMetadataTableAfterEnable() throws Exception {
+    init(COPY_ON_WRITE, true);
+    String instant1 = "0000001";
+    HoodieCommitMetadata hoodieCommitMetadata = doWriteOperationWithMeta(testTable, instant1, INSERT);
+
+    // Simulate the complete data directory including ".hoodie_partition_metadata" file
+    File metaForP1 = new File(metaClient.getBasePath() + "/p1",".hoodie_partition_metadata");
+    File metaForP2 = new File(metaClient.getBasePath() + "/p2",".hoodie_partition_metadata");
+    metaForP1.createNewFile();
+    metaForP2.createNewFile();
+
+    // Sync to metadata table
+    metaClient.reloadActiveTimeline();
+    HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
+    Option metadataWriter = table.getMetadataWriter(instant1, Option.of(hoodieCommitMetadata));
+    validateMetadata(testTable, true);
+
+    assertTrue(metadataWriter.isPresent());
+    HoodieTableConfig hoodieTableConfig =
+        new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig.getPayloadClass());
+    assertFalse(hoodieTableConfig.getMetadataPartitions().isEmpty());
+
+    // Turn off metadata table
+    HoodieWriteConfig writeConfig2 = HoodieWriteConfig.newBuilder()
+        .withProperties(this.writeConfig.getProps())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+        .build();
+    testTable = HoodieTestTable.of(metaClient);
+    String instant2 = "0000002";
+    HoodieCommitMetadata hoodieCommitMetadata2 = doWriteOperationWithMeta(testTable, instant2, INSERT);
+    metaClient.reloadActiveTimeline();
+    HoodieTable table2 = HoodieSparkTable.create(writeConfig2, context, metaClient);
+    Option metadataWriter2 = table2.getMetadataWriter(instant2, Option.of(hoodieCommitMetadata2));
+    assertFalse(metadataWriter2.isPresent());
+
+    HoodieTableConfig hoodieTableConfig2 =
+        new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig2.getPayloadClass());
+    assertEquals(Collections.emptyList(), hoodieTableConfig2.getMetadataPartitions());
+    // Assert metadata table folder is deleted
+    assertFalse(metaClient.getFs().exists(
+        new Path(HoodieTableMetadata.getMetadataTableBasePath(writeConfig2.getBasePath()))));
+
+    // Enable metadata table again and initialize metadata table through
+    // HoodieTable.getMetadataWriter() function
+    HoodieWriteConfig writeConfig3 = HoodieWriteConfig.newBuilder()
+        .withProperties(this.writeConfig.getProps())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
+        .build();
+    testTable = HoodieTestTable.of(metaClient);
+    metaClient.reloadActiveTimeline();
+    String instant3 = "0000003";
+    HoodieCommitMetadata hoodieCommitMetadata3 = doWriteOperationWithMeta(testTable, instant3, INSERT);
+    metaClient.reloadActiveTimeline();
+    HoodieTable table3 = HoodieSparkTable.create(writeConfig3, context, metaClient);
+    Option metadataWriter3 = table3.getMetadataWriter(instant3, Option.of(hoodieCommitMetadata3));
+    validateMetadata(testTable, true);
+    assertTrue(metadataWriter3.isPresent());
+    HoodieTableConfig hoodieTableConfig3 =
+        new HoodieTableConfig(this.fs, metaClient.getMetaPath(), writeConfig.getPayloadClass());
+    assertFalse(hoodieTableConfig3.getMetadataPartitions().isEmpty());
+  }
+
   /**
    * Only valid partition directories are added to the metadata.
    */
@@ -294,24 +482,29 @@ public void testMetadataTableArchival() throws Exception {
 
     AtomicInteger commitTime = new AtomicInteger(1);
     // trigger 2 regular writes(1 bootstrap commit). just 1 before archival can get triggered.
-    int i = 1;
-    for (; i <= 2; i++) {
+    for (int i = 1; i <= 2; i++) {
       doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), INSERT);
     }
     // expected num commits = 1 (bootstrap) + 2 (writes)
     HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build();
     HoodieActiveTimeline metadataTimeline = metadataMetaClient.reloadActiveTimeline();
-    assertEquals(metadataTimeline.getCommitsTimeline().filterCompletedInstants().countInstants(), 3);
+    assertEquals(3, metadataTimeline.getCommitsTimeline().filterCompletedInstants().countInstants());
 
-    // trigger a async table service, archival should not kick in, even though conditions are met.
+    // trigger an async table service, archival should not kick in, even though conditions are met.
     doCluster(testTable, "000000" + commitTime.getAndIncrement());
     metadataTimeline = metadataMetaClient.reloadActiveTimeline();
-    assertEquals(metadataTimeline.getCommitsTimeline().filterCompletedInstants().countInstants(), 4);
+    assertEquals(4, metadataTimeline.getCommitsTimeline().filterCompletedInstants().countInstants());
+
+    // start the timeline server for MARKERS cleaning up
+    getHoodieWriteClient(writeConfig);
+    // trigger a regular write operation. data set timeline archival should kick in.
+    doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), INSERT);
+    archiveDataTable(writeConfig, HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build());
 
-    // trigger a regular write operation. archival should kick in.
+    // trigger a regular write operation. metadata timeline archival should kick in.
     doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), INSERT);
     metadataTimeline = metadataMetaClient.reloadActiveTimeline();
-    assertEquals(metadataTimeline.getCommitsTimeline().filterCompletedInstants().countInstants(), 3);
+    assertEquals(4, metadataTimeline.getCommitsTimeline().filterCompletedInstants().countInstants());
   }
 
   @ParameterizedTest
@@ -428,7 +621,6 @@ public void testTableOperationsWithMetadataIndex(HoodieTableType tableType) thro
             .withMetadataIndexBloomFilterFileGroups(4)
             .withMetadataIndexColumnStats(true)
             .withMetadataIndexBloomFilterFileGroups(2)
-            .withMetadataIndexForAllColumns(true)
             .build())
         .build();
     init(tableType, writeConfig);
@@ -440,6 +632,73 @@ private void testTableOperationsForMetaIndexImpl(final HoodieWriteConfig writeCo
     testTableOperationsImpl(engineContext, writeConfig);
   }
 
+  @ParameterizedTest
+  @EnumSource(HoodieTableType.class)
+  public void testMetadataTableDeletePartition(HoodieTableType tableType) throws IOException {
+    initPath();
+    int maxCommits = 1;
+    HoodieWriteConfig cfg = getConfigBuilder(TRIP_EXAMPLE_SCHEMA, HoodieIndex.IndexType.BLOOM, HoodieFailedWritesCleaningPolicy.EAGER)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(maxCommits).build())
+        .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1)
+        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
+        .build();
+    init(tableType);
+    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, cfg)) {
+      // Write 1 (Bulk insert)
+      String newCommitTime = "0000001";
+      List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
+      client.startCommitWithTime(newCommitTime);
+      List<WriteStatus> writeStatuses = client.bulkInsert(jsc.parallelize(records, 1), newCommitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+      validateMetadata(client);
+
+      // Write 2 (upserts)
+      newCommitTime = "0000002";
+      client.startCommitWithTime(newCommitTime);
+      validateMetadata(client);
+
+      records = dataGen.generateInserts(newCommitTime, 10);
+      writeStatuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+
+      // metadata writer to delete column_stats partition
+      HoodieBackedTableMetadataWriter metadataWriter = metadataWriter(client);
+      assertNotNull(metadataWriter, "MetadataWriter should have been initialized");
+      metadataWriter.deletePartitions("0000003", Arrays.asList(COLUMN_STATS));
+
+      HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build();
+      List<String> metadataTablePartitions = FSUtils.getAllPartitionPaths(engineContext, metadataMetaClient.getBasePath(), false, false);
+      // partition should be physically deleted
+      assertEquals(metadataWriter.getEnabledPartitionTypes().size(), metadataTablePartitions.size());
+      assertFalse(metadataTablePartitions.contains(COLUMN_STATS.getPartitionPath()));
+
+      Option<HoodieInstant> completedReplaceInstant = metadataMetaClient.reloadActiveTimeline().getCompletedReplaceTimeline().lastInstant();
+      assertTrue(completedReplaceInstant.isPresent());
+      assertEquals("0000003", completedReplaceInstant.get().getTimestamp());
+
+      final Map<String, MetadataPartitionType> metadataEnabledPartitionTypes = new HashMap<>();
+      metadataWriter.getEnabledPartitionTypes().forEach(e -> metadataEnabledPartitionTypes.put(e.getPartitionPath(), e));
+      HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metadataMetaClient, metadataMetaClient.getActiveTimeline());
+      metadataTablePartitions.forEach(partition -> {
+        List<FileSlice> latestSlices = fsView.getLatestFileSlices(partition).collect(Collectors.toList());
+        if (COLUMN_STATS.getPartitionPath().equals(partition)) {
+          // there should not be any file slice in column_stats partition
+          assertTrue(latestSlices.isEmpty());
+        } else {
+          assertFalse(latestSlices.isEmpty());
+          assertTrue(latestSlices.stream().map(FileSlice::getBaseFile).count()
+              <= metadataEnabledPartitionTypes.get(partition).getFileGroupCount(), "Should have a single latest base file per file group");
+          assertTrue(latestSlices.size()
+              <= metadataEnabledPartitionTypes.get(partition).getFileGroupCount(), "Should have a single latest file slice per file group");
+        }
+      });
+    }
+  }
+
   /**
    * Tests that virtual key configs are honored in base files after compaction in metadata table.
    *
@@ -479,12 +738,12 @@ public void testVirtualKeysInBaseFiles(boolean populateMetaFields) throws Except
     HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get();
     HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(), new Path(baseFile.getPath()),
         new CacheConfig(context.getHadoopConf().get()));
-    List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
+    List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
     records.forEach(entry -> {
       if (populateMetaFields) {
-        assertNotNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+        assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
       } else {
-        assertNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+        assertNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
       }
     });
   }
@@ -679,7 +938,7 @@ private void verifyMetadataRecordKeyExcludeFromPayloadLogFiles(HoodieTable table
     // Compaction should not be triggered yet. Let's verify no base file
     // and few log files available.
     List<FileSlice> fileSlices = table.getSliceView()
-        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+        .getLatestFileSlices(FILES.getPartitionPath()).collect(Collectors.toList());
     if (fileSlices.isEmpty()) {
       throw new IllegalStateException("LogFile slices are not available!");
     }
@@ -720,30 +979,30 @@ private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> log
       }
 
       Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
-      HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
-
-      while (logFileReader.hasNext()) {
-        HoodieLogBlock logBlock = logFileReader.next();
-        if (logBlock instanceof HoodieDataBlock) {
-          try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
-            recordItr.forEachRemaining(indexRecord -> {
-              final GenericRecord record = (GenericRecord) indexRecord;
-              if (enableMetaFields) {
-                // Metadata table records should have meta fields!
-                assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
-                assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
-              } else {
-                // Metadata table records should not have meta fields!
-                assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
-                assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
-              }
-
-              final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
-              assertFalse(key.isEmpty());
-              if (enableMetaFields) {
-                assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
-              }
-            });
+      try (HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema)) {
+        while (logFileReader.hasNext()) {
+          HoodieLogBlock logBlock = logFileReader.next();
+          if (logBlock instanceof HoodieDataBlock) {
+            try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator()) {
+              recordItr.forEachRemaining(indexRecord -> {
+                final GenericRecord record = (GenericRecord) indexRecord;
+                if (enableMetaFields) {
+                  // Metadata table records should have meta fields!
+                  assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+                  assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+                } else {
+                  // Metadata table records should not have meta fields!
+                  assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+                  assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+                }
+
+                final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
+                assertFalse(key.isEmpty());
+                if (enableMetaFields) {
+                  assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
+                }
+              });
+            }
           }
         }
       }
@@ -771,7 +1030,7 @@ private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClien
         .withBasePath(metadataMetaClient.getBasePath())
         .withLogFilePaths(logFilePaths)
         .withLatestInstantTime(latestCommitTimestamp)
-        .withPartition(MetadataPartitionType.FILES.getPartitionPath())
+        .withPartition(FILES.getPartitionPath())
         .withReaderSchema(schema)
         .withMaxMemorySizeInBytes(100000L)
         .withBufferSize(4096)
@@ -801,7 +1060,7 @@ private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClien
   private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable table, boolean enableMetaFields) throws IOException {
     table.getHoodieView().sync();
     List<FileSlice> fileSlices = table.getSliceView()
-        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+        .getLatestFileSlices(FILES.getPartitionPath()).collect(Collectors.toList());
     if (!fileSlices.get(0).getBaseFile().isPresent()) {
       throw new IllegalStateException("Base file not available!");
     }
@@ -810,15 +1069,15 @@ private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable tabl
     HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(),
         new Path(baseFile.getPath()),
         new CacheConfig(context.getHadoopConf().get()));
-    List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
+    List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
     records.forEach(entry -> {
       if (enableMetaFields) {
-        assertNotNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+        assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
       } else {
-        assertNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+        assertNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
       }
 
-      final String keyInPayload = (String) ((GenericRecord) entry.getSecond())
+      final String keyInPayload = (String) ((GenericRecord) entry)
           .get(HoodieMetadataPayload.KEY_FIELD_NAME);
       assertFalse(keyInPayload.isEmpty());
     });
@@ -942,7 +1201,7 @@ public void testManualRollbacks(final boolean populateMateFields) throws Excepti
       }
     }
 
-    assertTrue(exceptionRaised, "Rollback of archived instants should fail");
+    assertFalse(exceptionRaised, "Metadata table should not archive instants that are in dataset active timeline");
     // Since each rollback also creates a deltacommit, we can only support rolling back of half of the original
     // instants present before rollback started.
     assertTrue(numRollbacks >= Math.max(minArchiveCommitsDataset, minArchiveCommitsMetadata) / 2,
@@ -1125,6 +1384,139 @@ public void testTableOperationsWithRestore(HoodieTableType tableType) throws Exc
     testTableOperationsImpl(engineContext, writeConfig);
   }
 
+  @Test
+  public void testColStatsPrefixLookup() throws IOException {
+    this.tableType = COPY_ON_WRITE;
+    initPath();
+    initSparkContexts("TestHoodieMetadata");
+    initFileSystem();
+    fs.mkdirs(new Path(basePath));
+    initTimelineService();
+    initMetaClient(tableType);
+    initTestDataGenerator();
+    metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
+
+    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+    // disable small file handling so that every insert goes to a new file group.
+    HoodieWriteConfig writeConfig = getWriteConfigBuilder(true, true, false)
+        .withRollbackUsingMarkers(false)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(0)
+            .withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1)
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+            .withAutoClean(false).retainCommits(1).retainFileVersions(1).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .enable(true)
+            .withMetadataIndexColumnStats(true)
+            .enableFullScan(false)
+            .build())
+        .build();
+
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, writeConfig)) {
+
+      String firstCommit = "0000001";
+      List<HoodieRecord> records = dataGen.generateInserts(firstCommit, 20);
+
+      AtomicInteger counter = new AtomicInteger();
+      List<HoodieRecord> processedRecords = records.stream().map(entry ->
+              new HoodieAvroRecord(new HoodieKey("key1_" + counter.getAndIncrement(), entry.getPartitionPath()), (HoodieRecordPayload) entry.getData()))
+          .collect(Collectors.toList());
+
+      client.startCommitWithTime(firstCommit);
+      List<WriteStatus> writeStatuses = client.insert(jsc.parallelize(processedRecords, 1), firstCommit).collect();
+      assertNoWriteErrors(writeStatuses);
+
+      // Write 2 (inserts)
+      String secondCommit = "0000002";
+      client.startCommitWithTime(secondCommit);
+      records = dataGen.generateInserts(secondCommit, 20);
+      AtomicInteger counter1 = new AtomicInteger();
+      processedRecords = records.stream().map(entry ->
+              new HoodieAvroRecord(new HoodieKey("key2_" + counter1.getAndIncrement(), entry.getPartitionPath()), (HoodieRecordPayload) entry.getData()))
+          .collect(Collectors.toList());
+      writeStatuses = client.insert(jsc.parallelize(processedRecords, 1), secondCommit).collect();
+      assertNoWriteErrors(writeStatuses);
+
+      Map<String, Map<String, List<String>>> commitToPartitionsToFiles = new HashMap<>();
+      // populate commit -> partition -> file info to assist in validation and prefi
+      metaClient.getActiveTimeline().getInstants().forEach(entry -> {
+        try {
+          HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+              .fromBytes(metaClient.getActiveTimeline().getInstantDetails(entry).get(), HoodieCommitMetadata.class);
+          String commitTime = entry.getTimestamp();
+          if (!commitToPartitionsToFiles.containsKey(commitTime)) {
+            commitToPartitionsToFiles.put(commitTime, new HashMap<>());
+          }
+          commitMetadata.getPartitionToWriteStats().entrySet()
+              .stream()
+              .forEach(partitionWriteStat -> {
+                String partitionStatName = partitionWriteStat.getKey();
+                List<HoodieWriteStat> writeStats = partitionWriteStat.getValue();
+                String partition = HoodieTableMetadataUtil.getPartitionIdentifier(partitionStatName);
+                if (!commitToPartitionsToFiles.get(commitTime).containsKey(partition)) {
+                  commitToPartitionsToFiles.get(commitTime).put(partition, new ArrayList<>());
+                }
+                writeStats.forEach(writeStat -> commitToPartitionsToFiles.get(commitTime).get(partition).add(writeStat.getPath()));
+              });
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      });
+
+      HoodieTableMetadata tableMetadata = metadata(client);
+      // prefix search for column (_hoodie_record_key)
+      ColumnIndexID columnIndexID = new ColumnIndexID(HoodieRecord.RECORD_KEY_METADATA_FIELD);
+      List<HoodieRecord<HoodieMetadataPayload>> result = tableMetadata.getRecordsByKeyPrefixes(Collections.singletonList(columnIndexID.asBase64EncodedString()),
+          MetadataPartitionType.COLUMN_STATS.getPartitionPath()).collectAsList();
+
+      // there are 3 partitions in total and 2 commits. total entries should be 6.
+      assertEquals(result.size(), 6);
+      result.forEach(entry -> {
+        //LOG.warn("Prefix search entries just for record key col : " + entry.getRecordKey().toString() + " :: " + entry.getData().getColumnStatMetadata().get().toString());
+      });
+
+      // prefix search for col(_hoodie_record_key) and first partition. only 2 files should be matched
+      PartitionIndexID partitionIndexID = new PartitionIndexID(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
+      result = tableMetadata.getRecordsByKeyPrefixes(Collections.singletonList(columnIndexID.asBase64EncodedString().concat(partitionIndexID.asBase64EncodedString())),
+          MetadataPartitionType.COLUMN_STATS.getPartitionPath()).collectAsList();
+      // 1 partition and 2 commits. total entries should be 2.
+      assertEquals(result.size(), 2);
+      result.forEach(entry -> {
+        // LOG.warn("Prefix search entries for record key col and first partition : " + entry.getRecordKey().toString() + " :: " + entry.getData().getColumnStatMetadata().get().toString());
+        HoodieMetadataColumnStats metadataColumnStats = entry.getData().getColumnStatMetadata().get();
+        String fileName = metadataColumnStats.getFileName();
+        if (fileName.contains(firstCommit)) {
+          assertTrue(commitToPartitionsToFiles.get(firstCommit).get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
+              .contains(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH + "/" + fileName));
+        } else {
+          assertTrue(commitToPartitionsToFiles.get(secondCommit).get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
+              .contains(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH + "/" + fileName));
+        }
+      });
+
+      // prefix search for column {commit time} and first partition
+      columnIndexID = new ColumnIndexID(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
+      result = tableMetadata.getRecordsByKeyPrefixes(Collections.singletonList(columnIndexID.asBase64EncodedString().concat(partitionIndexID.asBase64EncodedString())),
+          MetadataPartitionType.COLUMN_STATS.getPartitionPath()).collectAsList();
+
+      // 1 partition and 2 commits. total entries should be 2.
+      assertEquals(result.size(), 2);
+      result.forEach(entry -> {
+        // LOG.warn("Prefix search entries for record key col and first partition : " + entry.getRecordKey().toString() + " :: " + entry.getData().getColumnStatMetadata().get().toString());
+        HoodieMetadataColumnStats metadataColumnStats = entry.getData().getColumnStatMetadata().get();
+        // for commit time column, min max should be the same since we disable small files, every commit will create a new file
+        assertEquals(metadataColumnStats.getMinValue(), metadataColumnStats.getMaxValue());
+        String fileName = metadataColumnStats.getFileName();
+        if (fileName.contains(firstCommit)) {
+          assertTrue(commitToPartitionsToFiles.get(firstCommit).get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
+              .contains(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH + "/" + fileName));
+        } else {
+          assertTrue(commitToPartitionsToFiles.get(secondCommit).get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
+              .contains(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH + "/" + fileName));
+        }
+      });
+    }
+  }
+
   /**
    * Test all major table operations with the given table, config and context.
    *
@@ -1203,7 +1595,7 @@ private void testTableOperationsImpl(HoodieSparkEngineContext engineContext, Hoo
       validateMetadata(client);
 
       // Restore
-      client.restoreToInstant("0000006");
+      client.restoreToInstant("0000006", writeConfig.isMetadataTableEnabled());
       validateMetadata(client);
     }
   }
@@ -1218,8 +1610,8 @@ public void testMetadataMultiWriter() throws Exception {
 
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"1000");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY,"20");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "1000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY, "20");
     HoodieWriteConfig writeConfig = getWriteConfigBuilder(true, true, false)
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).withAutoClean(false).build())
@@ -1282,7 +1674,7 @@ public void testMultiWriterForDoubleLocking() throws Exception {
 
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
 
     HoodieWriteConfig writeConfig = getWriteConfigBuilder(true, true, false)
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
@@ -1613,7 +2005,7 @@ public void testRollbackDuringUpgradeForDoubleLocking() throws IOException, Inte
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
     properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY, "3");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
     HoodieWriteConfig writeConfig = getWriteConfigBuilder(false, true, false)
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).withAutoClean(false).build())
@@ -1722,6 +2114,60 @@ public void testRollbackOfPartiallyFailedCommitWithNewPartitions() throws Except
     }
   }
 
+  @Test
+  public void testDeletePartitions() throws Exception {
+    init(HoodieTableType.COPY_ON_WRITE);
+
+    int maxCommits = 1;
+    HoodieWriteConfig cfg = getConfigBuilder(TRIP_EXAMPLE_SCHEMA, HoodieIndex.IndexType.BLOOM, HoodieFailedWritesCleaningPolicy.EAGER)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(maxCommits).build())
+        .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1)
+        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
+        .build();
+
+    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
+      String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
+      client.startCommitWithTime(newCommitTime);
+      List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 10);
+      List<HoodieRecord> upsertRecords = new ArrayList<>();
+      for (HoodieRecord entry : records) {
+        if (entry.getPartitionPath().equals(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
+            || entry.getPartitionPath().equals(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)) {
+          upsertRecords.add(entry);
+        }
+      }
+      List<WriteStatus> writeStatuses = client.upsert(jsc.parallelize(upsertRecords, 1), newCommitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+      validateMetadata(client);
+
+      // delete partitions
+      newCommitTime = HoodieActiveTimeline.createNewInstantTime(5000);
+      client.startCommitWithTime(newCommitTime);
+      client.deletePartitions(singletonList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH), newCommitTime);
+
+      // add 1 more commit
+      newCommitTime = HoodieActiveTimeline.createNewInstantTime(5000);
+      client.startCommitWithTime(newCommitTime);
+      records = dataGen.generateInserts(newCommitTime, 10);
+      upsertRecords = new ArrayList<>();
+      for (HoodieRecord entry : records) {
+        if (entry.getPartitionPath().equals(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)) {
+          upsertRecords.add(entry);
+        }
+      }
+      writeStatuses = client.upsert(jsc.parallelize(upsertRecords, 1), newCommitTime).collect();
+      assertNoWriteErrors(writeStatuses);
+
+      // trigger clean which will actually trigger deletion of the partition
+      newCommitTime = HoodieActiveTimeline.createNewInstantTime(5000);
+      HoodieCleanMetadata cleanMetadata = client.clean(newCommitTime);
+      validateMetadata(client);
+      assertEquals(1, metadata(client).getAllPartitionPaths().size());
+    }
+  }
+
   /**
    * Test various error scenarios.
    */
@@ -1810,7 +2256,7 @@ public void testMetadataMetrics() throws Exception {
       assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".count"));
       assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".totalDuration"));
       assertTrue(metricsRegistry.getAllCounts().get(HoodieMetadataMetrics.INITIALIZE_STR + ".count") >= 1L);
-      final String prefix = MetadataPartitionType.FILES.getPartitionPath() + ".";
+      final String prefix = FILES.getPartitionPath() + ".";
       assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_BASE_FILES));
       assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_LOG_FILES));
       assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_TOTAL_BASE_FILE_SIZE));
@@ -2021,11 +2467,57 @@ private void validateMetadata(SparkRDDWriteClient testClient) throws IOException
       assertTrue(latestSlices.size()
           <= (numFileVersions * metadataEnabledPartitionTypes.get(partition).getFileGroupCount()), "Should limit file slice to "
           + numFileVersions + " per file group, but was " + latestSlices.size());
+      List<HoodieLogFile> logFiles = latestSlices.get(0).getLogFiles().collect(Collectors.toList());
+      try {
+        if (FILES.getPartitionPath().equals(partition)) {
+          verifyMetadataRawRecords(table, logFiles, false);
+        }
+        if (COLUMN_STATS.getPartitionPath().equals(partition)) {
+          verifyMetadataColumnStatsRecords(logFiles);
+        }
+      } catch (IOException e) {
+        LOG.error("Metadata record validation failed", e);
+        fail("Metadata record validation failed");
+      }
     });
 
     LOG.info("Validation time=" + timer.endTimer());
   }
 
+  private void verifyMetadataColumnStatsRecords(List<HoodieLogFile> logFiles) throws IOException {
+    for (HoodieLogFile logFile : logFiles) {
+      FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
+      MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
+      if (writerSchemaMsg == null) {
+        // not a data block
+        continue;
+      }
+
+      Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
+      try (HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema)) {
+        while (logFileReader.hasNext()) {
+          HoodieLogBlock logBlock = logFileReader.next();
+          if (logBlock instanceof HoodieDataBlock) {
+            try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator()) {
+              recordItr.forEachRemaining(indexRecord -> {
+                final GenericRecord record = (GenericRecord) indexRecord;
+                final GenericRecord colStatsRecord = (GenericRecord) record.get(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS);
+                assertNotNull(colStatsRecord);
+                assertNotNull(colStatsRecord.get(HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME));
+                assertNotNull(colStatsRecord.get(HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT));
+                /**
+                 * TODO: some types of field may have null min/max as these statistics are only supported for primitive types
+                 * assertNotNull(colStatsRecord.get(HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE));
+                 * assertNotNull(colStatsRecord.get(HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE));
+                 */
+              });
+            }
+          }
+        }
+      }
+    }
+  }
+
   /**
    * Returns the list of all files in the dataset by iterating over the metadata table.
    *
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
index 70f54b111980e..9a8fc55a20028 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
@@ -21,9 +21,9 @@
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
@@ -51,8 +51,6 @@
 import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
-
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.avro.AvroSchemaConverter;
@@ -288,19 +286,19 @@ private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> log
       }
 
       Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
-      HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
-
-      while (logFileReader.hasNext()) {
-        HoodieLogBlock logBlock = logFileReader.next();
-        if (logBlock instanceof HoodieDataBlock) {
-          try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
-            recordItr.forEachRemaining(indexRecord -> {
-              final GenericRecord record = (GenericRecord) indexRecord;
-              assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
-              assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
-              final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
-              assertFalse(key.isEmpty());
-            });
+      try (HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema)) {
+        while (logFileReader.hasNext()) {
+          HoodieLogBlock logBlock = logFileReader.next();
+          if (logBlock instanceof HoodieDataBlock) {
+            try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator()) {
+              recordItr.forEachRemaining(indexRecord -> {
+                final GenericRecord record = (GenericRecord) indexRecord;
+                assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+                assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+                final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
+                assertFalse(key.isEmpty());
+              });
+            }
           }
         }
       }
@@ -361,10 +359,10 @@ private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable tabl
     HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(),
         new Path(baseFile.getPath()),
         new CacheConfig(context.getHadoopConf().get()));
-    List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
+    List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
     records.forEach(entry -> {
-      assertNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
-      final String keyInPayload = (String) ((GenericRecord) entry.getSecond())
+      assertNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+      final String keyInPayload = (String) ((GenericRecord) entry)
           .get(HoodieMetadataPayload.KEY_FIELD_NAME);
       assertFalse(keyInPayload.isEmpty());
     });
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
index ce0cc37c78e51..a6a37030e8a69 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
@@ -585,7 +585,7 @@ private void testUpsertsInternal(HoodieWriteConfig config,
 
     client.savepoint("004", "user1","comment1");
 
-    client.restoreToInstant("004");
+    client.restoreToInstant("004", config.isMetadataTableEnabled());
 
     assertFalse(metaClient.reloadActiveTimeline().getRollbackTimeline().lastInstant().isPresent());
 
@@ -1399,6 +1399,41 @@ public void testSimpleClustering(boolean populateMetaFields, boolean preserveCom
     testInsertAndClustering(clusteringConfig, populateMetaFields, true, false, SqlQueryEqualityPreCommitValidator.class.getName(), COUNT_SQL_QUERY_FOR_VALIDATION, "");
   }
 
+  @Test
+  public void testRolblackOfRegularCommitWithPendingReplaceCommitInTimeline() throws Exception {
+    HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10)
+        .withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true)
+        .withPreserveHoodieCommitMetadata(true).build();
+    // trigger clustering, but do not complete
+    testInsertAndClustering(clusteringConfig, true, false, false, SqlQueryEqualityPreCommitValidator.class.getName(), COUNT_SQL_QUERY_FOR_VALIDATION, "");
+
+    // trigger another partial commit, followed by valid commit. rollback of partial commit should succeed.
+    HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder().withAutoCommit(false);
+    SparkRDDWriteClient client = getHoodieWriteClient(cfgBuilder.build());
+    String commitTime1 = HoodieActiveTimeline.createNewInstantTime();
+    List<HoodieRecord> records1 = dataGen.generateInserts(commitTime1, 200);
+    client.startCommitWithTime(commitTime1);
+    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(records1, 2);
+    JavaRDD<WriteStatus> statuses = client.upsert(insertRecordsRDD1, commitTime1);
+    List<WriteStatus> statusList = statuses.collect();
+    assertNoWriteErrors(statusList);
+
+    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
+    assertEquals(2, metaClient.getActiveTimeline().getCommitsTimeline().filterInflightsAndRequested().countInstants());
+
+    // trigger another commit. this should rollback latest partial commit.
+    records1 = dataGen.generateInserts(commitTime1, 200);
+    client.startCommitWithTime(commitTime1);
+    insertRecordsRDD1 = jsc.parallelize(records1, 2);
+    statuses = client.upsert(insertRecordsRDD1, commitTime1);
+    statusList = statuses.collect();
+    assertNoWriteErrors(statusList);
+    client.commit(commitTime1, statuses);
+    metaClient.reloadActiveTimeline();
+    // rollback should have succeeded. Essentially, the pending clustering should not hinder the rollback of regular commits.
+    assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().filterInflightsAndRequested().countInstants());
+  }
+
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testInlineScheduleClustering(boolean scheduleInlineClustering) throws IOException {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
index 8c27e488dc782..024cf1ff50acc 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
@@ -78,6 +78,10 @@
 import scala.Tuple2;
 
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
+import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -198,6 +202,62 @@ public void testSimpleTagLocationAndUpdate(IndexType indexType, boolean populate
     recordLocations.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry._1.getRecordKey()), entry._1.getPartitionPath(), "PartitionPath mismatch"));
   }
 
+  @Test
+  public void testLookupIndexWithOrWithoutColumnStats() throws Exception {
+    setUp(IndexType.BLOOM, true, true);
+    String newCommitTime = "001";
+    int totalRecords = 10 + random.nextInt(20);
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, totalRecords);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
+
+    // Test tagLocation without any entries in index
+    JavaRDD<HoodieRecord> javaRDD = tagLocation(index, writeRecords, hoodieTable);
+    assert (javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 0);
+
+    // Insert totalRecords records
+    writeClient.startCommitWithTime(newCommitTime);
+    JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
+    Assertions.assertNoWriteErrors(writeStatues.collect());
+
+    // Now tagLocation for these records
+    javaRDD = tagLocation(index, writeRecords, hoodieTable);
+    assert (javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 0);
+    // Now commit this & update location of records inserted
+    writeClient.commit(newCommitTime, writeStatues);
+
+    // check column_stats partition exists
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    assertTrue(metadataPartitionExists(metaClient.getBasePath(), context, COLUMN_STATS));
+    assertTrue(getCompletedMetadataPartitions(metaClient.getTableConfig()).contains(COLUMN_STATS.getPartitionPath()));
+
+    // delete the column_stats partition
+    deleteMetadataPartition(metaClient.getBasePath(), context, COLUMN_STATS);
+
+    // Now tagLocation for these records, they should be tagged correctly despite column_stats being enabled but not present
+    hoodieTable = HoodieSparkTable.create(config, context, metaClient);
+    javaRDD = tagLocation(index, writeRecords, hoodieTable);
+    Map<String, String> recordKeyToPartitionPathMap = new HashMap();
+    List<HoodieRecord> hoodieRecords = writeRecords.collect();
+    hoodieRecords.forEach(entry -> recordKeyToPartitionPathMap.put(entry.getRecordKey(), entry.getPartitionPath()));
+
+    assertEquals(totalRecords, javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size());
+    assertEquals(totalRecords, javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count());
+    assertEquals(totalRecords, javaRDD.filter(record -> (record.getCurrentLocation() != null
+        && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
+    javaRDD.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry.getRecordKey()), entry.getPartitionPath(), "PartitionPath mismatch"));
+
+    JavaRDD<HoodieKey> hoodieKeyJavaRDD = writeRecords.map(entry -> entry.getKey());
+    JavaPairRDD<HoodieKey, Option<Pair<String, String>>> recordLocations = getRecordLocations(hoodieKeyJavaRDD, hoodieTable);
+    List<HoodieKey> hoodieKeys = hoodieKeyJavaRDD.collect();
+    assertEquals(totalRecords, recordLocations.collect().size());
+    assertEquals(totalRecords, recordLocations.map(record -> record._1).distinct().count());
+    recordLocations.foreach(entry -> assertTrue(hoodieKeys.contains(entry._1), "Missing HoodieKey"));
+    recordLocations.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry._1.getRecordKey()), entry._1.getPartitionPath(), "PartitionPath mismatch"));
+  }
+
   @ParameterizedTest
   @MethodSource("indexTypeParams")
   public void testTagLocationAndDuplicateUpdate(IndexType indexType, boolean populateMetaFields, boolean enableMetadataIndex) throws Exception {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
index f00a0b8d19158..2e387be54452a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
@@ -94,6 +95,10 @@ public void init(HoodieTableType tableType, boolean enableMetadataTable) throws
     init(tableType, enableMetadataTable, true, false, false);
   }
 
+  public void init(HoodieTableType tableType, boolean enableMetadataTable, boolean enableColumnStats) throws IOException {
+    init(tableType, enableMetadataTable, true, false, false);
+  }
+
   public void init(HoodieTableType tableType, boolean enableMetadataTable, boolean enableFullScan, boolean enableMetrics, boolean
       validateMetadataPayloadStateConsistency) throws IOException {
     init(tableType, Option.empty(), enableMetadataTable, enableFullScan, enableMetrics,
@@ -176,6 +181,10 @@ protected void doWriteOperation(HoodieTestTable testTable, String commitTime, Wr
     testTable.doWriteOperation(commitTime, operationType, emptyList(), asList("p1", "p2"), 3);
   }
 
+  protected HoodieCommitMetadata doWriteOperationWithMeta(HoodieTestTable testTable, String commitTime, WriteOperationType operationType) throws Exception {
+    return testTable.doWriteOperation(commitTime, operationType, emptyList(), asList("p1", "p2"), 3);
+  }
+
   protected void doClean(HoodieTestTable testTable, String commitTime, List<String> commitsToClean) throws IOException {
     doCleanInternal(testTable, commitTime, commitsToClean, false);
   }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitioner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitioner.java
index 712f40568aae2..4d2f5e0c5e229 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitioner.java
@@ -60,6 +60,12 @@ public static JavaRDD<HoodieRecord> generateTestRecordsForBulkInsert(JavaSparkCo
     return jsc.parallelize(records1, 1).union(jsc.parallelize(records2, 1));
   }
 
+  public static JavaRDD<HoodieRecord> generateTestRecordsForBulkInsert(JavaSparkContext jsc, int count) {
+    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
+    List<HoodieRecord> records = dataGenerator.generateInserts("0", count);
+    return jsc.parallelize(records, 1);
+  }
+
   public static Map<String, Long> generateExpectedPartitionNumRecords(JavaRDD<HoodieRecord> records) {
     return records.map(record -> record.getPartitionPath()).countByValue();
   }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
index aafc538213738..445780384f97a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
@@ -20,11 +20,14 @@
 
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.client.HoodieTimelineArchiver;
+import org.apache.hudi.client.transaction.lock.InProcessLockProvider;
 import org.apache.hudi.client.utils.MetadataConversionUtils;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
@@ -42,6 +45,7 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieLockConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.metadata.HoodieTableMetadata;
@@ -71,6 +75,12 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
@@ -131,7 +141,8 @@ private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
                                                            int maxDeltaCommitsMetadataTable,
                                                            HoodieTableType tableType) throws Exception {
     return initTestTableAndGetWriteConfig(enableMetadata, minArchivalCommits, maxArchivalCommits,
-        maxDeltaCommits, maxDeltaCommitsMetadataTable, tableType, false, 10, 209715200);
+        maxDeltaCommits, maxDeltaCommitsMetadataTable, tableType, false, 10, 209715200,
+        HoodieFailedWritesCleaningPolicy.EAGER, WriteConcurrencyMode.SINGLE_WRITER);
   }
 
   private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
@@ -140,7 +151,8 @@ private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
                                                            int maxDeltaCommitsMetadataTable,
                                                            HoodieTableType tableType) throws Exception {
     return initTestTableAndGetWriteConfig(enableMetadata, minArchivalCommits, maxArchivalCommits,
-        5, maxDeltaCommitsMetadataTable, tableType, false, 10, 209715200);
+        5, maxDeltaCommitsMetadataTable, tableType, false, 10, 209715200,
+        HoodieFailedWritesCleaningPolicy.EAGER, WriteConcurrencyMode.SINGLE_WRITER);
   }
 
   private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
@@ -151,7 +163,8 @@ private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
                                                            int archiveFilesBatch,
                                                            long size) throws Exception {
     return initTestTableAndGetWriteConfig(enableMetadata, minArchivalCommits, maxArchivalCommits, 5,
-        maxDeltaCommitsMetadataTable, HoodieTableType.COPY_ON_WRITE, enableArchiveMerge, archiveFilesBatch, size);
+        maxDeltaCommitsMetadataTable, HoodieTableType.COPY_ON_WRITE, enableArchiveMerge, archiveFilesBatch, size,
+        HoodieFailedWritesCleaningPolicy.EAGER, WriteConcurrencyMode.SINGLE_WRITER);
   }
 
   private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
@@ -162,7 +175,9 @@ private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
                                                            HoodieTableType tableType,
                                                            boolean enableArchiveMerge,
                                                            int archiveFilesBatch,
-                                                           long size) throws Exception {
+                                                           long size,
+                                                           HoodieFailedWritesCleaningPolicy failedWritesCleaningPolicy,
+                                                           WriteConcurrencyMode writeConcurrencyMode) throws Exception {
     init(tableType);
     HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(basePath)
         .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
@@ -171,11 +186,15 @@ private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
             .withArchiveMergeEnable(enableArchiveMerge)
             .withArchiveMergeFilesBatchSize(archiveFilesBatch)
             .withArchiveMergeSmallFileLimit(size)
+            .withFailedWritesCleaningPolicy(failedWritesCleaningPolicy)
             .build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
         .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadata)
             .withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsMetadataTable).build())
+        .withWriteConcurrencyMode(writeConcurrencyMode)
+        .withLockConfig(HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class)
+            .build())
         .forTable("test-trip-table").build();
     initWriteConfigAndMetatableWriter(writeConfig, enableMetadata);
     return writeConfig;
@@ -404,6 +423,79 @@ public void testLoadArchiveTimelineWithDamagedPlanFile(boolean enableArchiveMerg
     assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
   }
 
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testArchivalWithMultiWriters(boolean enableMetadata) throws Exception {
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 5, 2,
+        HoodieTableType.COPY_ON_WRITE, false, 10, 209715200,
+        HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL);
+
+    final ExecutorService executors = Executors.newFixedThreadPool(2);
+    List<CompletableFuture<Boolean>> completableFutureList = new ArrayList<>();
+    CountDownLatch countDownLatch = new CountDownLatch(1);
+    IntStream.range(0, 2).forEach(index -> {
+      completableFutureList.add(CompletableFuture.supplyAsync(() -> {
+        HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
+        try {
+          // wait until 4 commits are available so that archival thread will have something to archive.
+          countDownLatch.await(30, TimeUnit.SECONDS);
+        } catch (InterruptedException e) {
+          throw new HoodieException("Should not have thrown InterruptedException ", e);
+        }
+        metaClient.reloadActiveTimeline();
+        while (!metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant().get().getTimestamp().endsWith("29")
+            || metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() > 4) {
+          try {
+            HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
+            archiver.archiveIfRequired(context, true);
+            // if not for below sleep, both archiving threads acquires lock in quick succession and does not give space for main thread
+            // to complete the write operation when metadata table is enabled.
+            if (enableMetadata) {
+              Thread.sleep(2);
+            }
+          } catch (IOException e) {
+            throw new HoodieException("IOException thrown while archiving ", e);
+          } catch (InterruptedException e) {
+            throw new HoodieException("Should not have thrown InterruptedException ", e);
+          }
+          table.getMetaClient().reloadActiveTimeline();
+        }
+        return true;
+      }, executors));
+    });
+
+    // do ingestion and trigger archive actions here.
+    for (int i = 1; i < 30; i++) {
+      testTable.doWriteOperation("0000000" + String.format("%02d", i), WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
+      if (i == 5) {
+        // start up archival threads only after 4 commits.
+        countDownLatch.countDown();
+      }
+    }
+
+    try {
+      CompletableFuture completableFuture = allOfTerminateOnFailure(completableFutureList);
+      completableFuture.get();
+    } finally {
+      executors.shutdownNow();
+    }
+  }
+
+  public static CompletableFuture allOfTerminateOnFailure(List<CompletableFuture<Boolean>> futures) {
+    CompletableFuture<?> failure = new CompletableFuture();
+    AtomicBoolean jobFailed = new AtomicBoolean(false);
+    for (CompletableFuture<?> f : futures) {
+      f.exceptionally(ex -> {
+        if (!jobFailed.getAndSet(true)) {
+          LOG.warn("One of the job failed. Cancelling all other futures. " + ex.getCause() + ", " + ex.getMessage());
+          futures.forEach(future -> future.cancel(true));
+        }
+        return null;
+      });
+    }
+    return CompletableFuture.anyOf(failure, CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])));
+  }
+
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testLoadArchiveTimelineWithUncompletedMergeArchiveFile(boolean enableArchiveMerge) throws Exception {
@@ -1054,7 +1146,7 @@ public void testArchivalAndCompactionInMetadataTable() throws Exception {
         .setBasePath(HoodieTableMetadata.getMetadataTableBasePath(basePath))
         .setLoadActiveTimelineOnLoad(true).build();
 
-    for (int i = 1; i <= 16; i++) {
+    for (int i = 1; i <= 17; i++) {
       testTable.doWriteOperation("000000" + String.format("%02d", i), WriteOperationType.UPSERT,
           i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
       // archival
@@ -1075,6 +1167,30 @@ public void testArchivalAndCompactionInMetadataTable() throws Exception {
         IntStream.range(1, i + 1).forEach(j ->
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "0000000" + j))));
+      } else if (i == 8) {
+        // i == 8
+        // The instant "00000000000000" was archived since it's less than
+        // the earliest instant on the dataset active timeline,
+        // the dataset active timeline has instants of range [00000001 ~ 00000008]
+        // because when it does the archiving, no compaction instant on the
+        // metadata active timeline exists yet.
+        assertEquals(9, metadataTableInstants.size());
+        assertTrue(metadataTableInstants.contains(
+            new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "00000007001")));
+        IntStream.range(1, i + 1).forEach(j ->
+            assertTrue(metadataTableInstants.contains(
+                new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "0000000" + j))));
+      } else if (i <= 11) {
+        // In the metadata table timeline, the first delta commit is "00000007"
+        // because it equals with the earliest commit on the dataset timeline, after archival,
+        // delta commits "00000008" till "00000011" are added later on without archival or compaction
+        assertEquals(i - 5, metadataTableInstants.size());
+        assertTrue(metadataTableInstants.contains(
+            new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "00000007001")));
+        IntStream.range(7, i + 1).forEach(j ->
+            assertTrue(metadataTableInstants.contains(
+                new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION,
+                    "000000" + String.format("%02d", j)))));
       } else if (i <= 14) {
         // In the metadata table timeline, the first delta commit is "00000007001"
         // from metadata table compaction, after archival, delta commits "00000008"
@@ -1095,14 +1211,27 @@ public void testArchivalAndCompactionInMetadataTable() throws Exception {
             assertTrue(metadataTableInstants.contains(
                 new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION,
                     "000000" + String.format("%02d", j)))));
-      } else {
+      } else if (i == 16) {
         // i == 16
-        // Only commit "00000015001" and delta commit "00000016" are in the active timeline
-        assertEquals(2, metadataTableInstants.size());
+        // dataset timeline has commits "00000015" and "00000016",
+        // the metadata timeline has commits [00000008, 00000016] and "00000015001"
+        assertEquals(10, metadataTableInstants.size());
         assertTrue(metadataTableInstants.contains(
             new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "00000015001")));
+        IntStream.range(8, 17).forEach(j ->
+            assertTrue(metadataTableInstants.contains(
+                new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION,
+                    "000000" + String.format("%02d", j)))));
+      } else {
+        // i == 17
+        // Only commits [00000015, 00000017] and "00000015001" are on the metadata timeline
+        assertEquals(4, metadataTableInstants.size());
         assertTrue(metadataTableInstants.contains(
-            new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "00000016")));
+            new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "00000015001")));
+        IntStream.range(15, 18).forEach(j ->
+            assertTrue(metadataTableInstants.contains(
+                new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION,
+                    "000000" + String.format("%02d", j)))));
       }
     }
   }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
index 56cfe959bbabf..7e774c32c09f0 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
@@ -75,6 +75,7 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.index.SparkHoodieIndexFactory;
+import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.action.clean.CleanPlanner;
@@ -627,19 +628,24 @@ private void testFailedInsertAndCleanByCommits(
    * @param config HoodieWriteConfig
    */
   protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config) throws IOException {
-    return runCleaner(config, false, 1, false);
+    return runCleaner(config, false, false, 1, false);
   }
 
   protected List<HoodieCleanStat> runCleanerWithInstantFormat(HoodieWriteConfig config, boolean needInstantInHudiFormat) throws IOException {
-    return runCleaner(config, false, 1, needInstantInHudiFormat);
+    return runCleaner(config, false, false, 1, needInstantInHudiFormat);
   }
 
   protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, int firstCommitSequence, boolean needInstantInHudiFormat) throws IOException {
-    return runCleaner(config, false, firstCommitSequence, needInstantInHudiFormat);
+    return runCleaner(config, false, false, firstCommitSequence, needInstantInHudiFormat);
   }
 
   protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simulateRetryFailure) throws IOException {
-    return runCleaner(config, simulateRetryFailure, 1, false);
+    return runCleaner(config, simulateRetryFailure, false, 1, false);
+  }
+
+  protected List<HoodieCleanStat> runCleaner(
+      HoodieWriteConfig config, boolean simulateRetryFailure, boolean simulateMetadataFailure) throws IOException {
+    return runCleaner(config, simulateRetryFailure, simulateMetadataFailure, 1, false);
   }
 
   /**
@@ -647,7 +653,9 @@ protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean sim
    *
    * @param config HoodieWriteConfig
    */
-  protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simulateRetryFailure, Integer firstCommitSequence, boolean needInstantInHudiFormat) throws IOException {
+  protected List<HoodieCleanStat> runCleaner(
+      HoodieWriteConfig config, boolean simulateRetryFailure, boolean simulateMetadataFailure,
+      Integer firstCommitSequence, boolean needInstantInHudiFormat) throws IOException {
     SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(config);
     String cleanInstantTs = needInstantInHudiFormat ? makeNewCommitTime(firstCommitSequence, "%014d") : makeNewCommitTime(firstCommitSequence, "%09d");
     HoodieCleanMetadata cleanMetadata1 = writeClient.clean(cleanInstantTs);
@@ -670,6 +678,17 @@ protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean sim
         });
       });
       metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant);
+
+      if (config.isMetadataTableEnabled() && simulateMetadataFailure) {
+        // Simulate the failure of corresponding instant in the metadata table
+        HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder()
+            .setBasePath(HoodieTableMetadata.getMetadataTableBasePath(metaClient.getBasePath()))
+            .setConf(metaClient.getHadoopConf())
+            .build();
+        HoodieInstant deltaCommit = new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, cleanInstantTs);
+        metadataMetaClient.reloadActiveTimeline().revertToInflight(deltaCommit);
+      }
+
       // retry clean operation again
       writeClient.clean();
       final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant);
@@ -1215,12 +1234,80 @@ public void testCleanPreviousCorruptedCleanFiles() throws IOException {
     assertEquals(0, cleanStats.size(), "Must not clean any files");
   }
 
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testRerunFailedClean(boolean simulateMetadataFailure) throws Exception {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withMaxNumDeltaCommitsBeforeCompaction(1)
+            .withAssumeDatePartitioning(true).build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
+        .build();
+
+    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
+    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
+    String p0 = "2020/01/01";
+    String p1 = "2020/01/02";
+
+    // make 1 commit, with 1 file per partition
+    String file1P0C0 = UUID.randomUUID().toString();
+    String file1P1C0 = UUID.randomUUID().toString();
+    testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+
+    HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001",
+        Collections.unmodifiableMap(new HashMap<String, List<String>>() {
+          {
+            put(p0, CollectionUtils.createImmutableList(file1P0C0));
+            put(p1, CollectionUtils.createImmutableList(file1P1C0));
+          }
+        })
+    );
+    metadataWriter.update(commitMetadata, "00000000000001", false);
+    metaClient.getActiveTimeline().saveAsComplete(
+        new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+
+    // make next replacecommit, with 1 clustering operation. logically delete p0. No change to p1
+    // notice that clustering generates empty inflight commit files
+    Map<String, String> partitionAndFileId002 = testTable.forReplaceCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0);
+    String file2P0C1 = partitionAndFileId002.get(p0);
+    Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> replaceMetadata =
+        generateReplaceCommitMetadata("00000000000002", p0, file1P0C0, file2P0C1);
+    testTable.addReplaceCommit("00000000000002", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
+
+    // make next replacecommit, with 1 clustering operation. Replace data in p1. No change to p0
+    // notice that clustering generates empty inflight commit files
+    Map<String, String> partitionAndFileId003 = testTable.forReplaceCommit("00000000000003").getFileIdsWithBaseFilesInPartitions(p1);
+    String file3P1C2 = partitionAndFileId003.get(p1);
+    replaceMetadata = generateReplaceCommitMetadata("00000000000003", p1, file1P1C0, file3P1C2);
+    testTable.addReplaceCommit("00000000000003", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
+
+    // make next replacecommit, with 1 clustering operation. Replace data in p0 again
+    // notice that clustering generates empty inflight commit files
+    Map<String, String> partitionAndFileId004 = testTable.forReplaceCommit("00000000000004").getFileIdsWithBaseFilesInPartitions(p0);
+    String file4P0C3 = partitionAndFileId004.get(p0);
+    replaceMetadata = generateReplaceCommitMetadata("00000000000004", p0, file2P0C1, file4P0C3);
+    testTable.addReplaceCommit("00000000000004", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
+
+    // run cleaner with failures
+    List<HoodieCleanStat> hoodieCleanStats = runCleaner(config, true, simulateMetadataFailure, 5, true);
+    assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
+    assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
+    assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
+    assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
+    //file1P1C0 still stays because its not replaced until 3 and its the only version available
+    assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
+  }
+
   /**
    * Test Helper for cleaning failed writes by versions logic from HoodieWriteClient API perspective.
    *
-   * @param insertFn Insert API to be tested
+   * @param insertFn     Insert API to be tested
    * @param isPreppedAPI Flag to indicate if a prepped-version is used. If true, a wrapper function will be used during
-   *        record generation to also tag the regards (de-dupe is implicit as we use unique record-gen APIs)
+   *                     record generation to also tag the regards (de-dupe is implicit as we use unique record-gen APIs)
    * @throws Exception in case of errors
    */
   private void testInsertAndCleanFailedWritesByVersions(
@@ -1422,6 +1509,7 @@ private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(final Hoo
   protected static HoodieCommitMetadata generateCommitMetadata(
       String instantTime, Map<String, List<String>> partitionToFilePaths) {
     HoodieCommitMetadata metadata = new HoodieCommitMetadata();
+    metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, HoodieTestTable.PHONY_TABLE_SCHEMA);
     partitionToFilePaths.forEach((partitionPath, fileList) -> fileList.forEach(f -> {
       HoodieWriteStat writeStat = new HoodieWriteStat();
       writeStat.setPartitionPath(partitionPath);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
index 0b29cf25f9e3f..8114daa30f763 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
@@ -25,10 +25,13 @@
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.testutils.RawTripTestPayload;
 import org.apache.hudi.common.testutils.Transformations;
@@ -87,6 +90,7 @@
 import static org.apache.hudi.execution.bulkinsert.TestBulkInsertInternalPartitioner.generateExpectedPartitionNumRecords;
 import static org.apache.hudi.execution.bulkinsert.TestBulkInsertInternalPartitioner.generateTestRecordsForBulkInsert;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -437,7 +441,7 @@ public void testFileSizeUpsertRecords() throws Exception {
         counts++;
       }
     }
-    assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file");
+    assertEquals(5, counts, "If the number of records are more than 1150, then there should be a new file");
   }
 
   @Test
@@ -498,4 +502,52 @@ public void testBulkInsertRecords(String bulkInsertMode) throws Exception {
   public void testBulkInsertRecordsWithGlobalSort(String bulkInsertMode) throws Exception {
     testBulkInsertRecords(bulkInsertMode);
   }
+
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testPartitionMetafileFormat(boolean partitionMetafileUseBaseFormat) throws Exception {
+    // By default there is no format specified for partition metafile
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
+        .withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).build();
+    HoodieSparkCopyOnWriteTable table = (HoodieSparkCopyOnWriteTable) HoodieSparkTable.create(config, context, metaClient);
+    assertFalse(table.getPartitionMetafileFormat().isPresent());
+
+    if (partitionMetafileUseBaseFormat) {
+      // Add the setting to use datafile format
+      Properties properties = new Properties();
+      properties.setProperty(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key(), "true");
+      initMetaClient(HoodieTableType.COPY_ON_WRITE, properties);
+      metaClient = HoodieTableMetaClient.reload(metaClient);
+      assertTrue(metaClient.getTableConfig().getPartitionMetafileFormat().isPresent());
+      table = (HoodieSparkCopyOnWriteTable) HoodieSparkTable.create(config, context, metaClient);
+      assertTrue(table.getPartitionMetafileFormat().isPresent());
+    }
+
+    String instantTime = makeNewCommitTime();
+    SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
+    writeClient.startCommitWithTime(instantTime);
+
+    // Insert new records
+    final JavaRDD<HoodieRecord> inputRecords = generateTestRecordsForBulkInsert(jsc, 10);
+    writeClient.bulkInsert(inputRecords, instantTime);
+
+    // Partition metafile should be created
+    Path partitionPath = new Path(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
+    assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath));
+    Option<Path> metafilePath = HoodiePartitionMetadata.getPartitionMetafilePath(fs, partitionPath);
+    if (partitionMetafileUseBaseFormat) {
+      // Extension should be the same as the data file format of the table
+      assertTrue(metafilePath.get().toString().endsWith(table.getBaseFileFormat().getFileExtension()));
+    } else {
+      // No extension as it is in properties file format
+      assertTrue(metafilePath.get().toString().endsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX));
+    }
+
+    // Validate contents of the partition metafile
+    HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, partitionPath);
+    partitionMetadata.readFromFS();
+    assertTrue(partitionMetadata.getPartitionDepth() == 3);
+    assertTrue(partitionMetadata.readPartitionCreatedCommitTime().get().equals(instantTime));
+  }
+
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
index 310ff4fe8aede..7f1046ba90ce4 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
@@ -119,7 +119,7 @@ public void testSuccessfulCompactionBasedOnTime() throws Exception {
   @Test
   public void testSuccessfulCompactionBasedOnNumOrTime() throws Exception {
     // Given: make three commits
-    HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_OR_TIME);
+    HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 60, CompactionTriggerStrategy.NUM_OR_TIME);
     try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
       List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 10);
       HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
@@ -134,7 +134,7 @@ public void testSuccessfulCompactionBasedOnNumOrTime() throws Exception {
       assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
       // 4th commit, that will trigger compaction because reach the time elapsed
       metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
-      finalInstant = HoodieActiveTimeline.createNewInstantTime(20000);
+      finalInstant = HoodieActiveTimeline.createNewInstantTime(60000);
       createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
 
       metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestCopyOnWriteRollbackActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestCopyOnWriteRollbackActionExecutor.java
index 1bee6ac0ac622..e5dd5b087aa23 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestCopyOnWriteRollbackActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestCopyOnWriteRollbackActionExecutor.java
@@ -83,10 +83,10 @@ public void testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile()
     HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(false).build();
     HoodieTable table = this.getHoodieTable(metaClient, writeConfig);
     HoodieInstant needRollBackInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "002");
-
+    String rollbackInstant = "003";
     // execute CopyOnWriteRollbackActionExecutor with filelisting mode
     BaseRollbackPlanActionExecutor copyOnWriteRollbackPlanActionExecutor =
-        new BaseRollbackPlanActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, false,
+        new BaseRollbackPlanActionExecutor(context, table.getConfig(), table, rollbackInstant, needRollBackInstant, false,
             table.getConfig().shouldRollbackUsingMarkers());
     HoodieRollbackPlan rollbackPlan = (HoodieRollbackPlan) copyOnWriteRollbackPlanActionExecutor.execute().get();
     CopyOnWriteRollbackActionExecutor copyOnWriteRollbackActionExecutor = new CopyOnWriteRollbackActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, true,
@@ -125,7 +125,9 @@ public void testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile()
     assertTrue(testTable.commitExists("001"));
     assertTrue(testTable.baseFileExists(p1, "001", "id11"));
     assertTrue(testTable.baseFileExists(p2, "001", "id12"));
-    assertFalse(testTable.inflightCommitExists("002"));
+    // Note that executeRollback() does not delete inflight instant files
+    // The deletion is done in finishRollback() called by runRollback()
+    assertTrue(testTable.inflightCommitExists("002"));
     assertFalse(testTable.commitExists("002"));
     assertFalse(testTable.baseFileExists(p1, "002", "id21"));
     assertFalse(testTable.baseFileExists(p2, "002", "id22"));
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
index c9e3fed871acf..d8ce6612a443a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
@@ -45,7 +45,6 @@
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
 import org.apache.hudi.testutils.MetadataMergeWriteStatus;
-
 import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
@@ -125,8 +124,8 @@ public void testMergeOnReadRollbackActionExecutor(boolean isUsingMarkers) throws
 
     for (Map.Entry<String, HoodieRollbackPartitionMetadata> entry : rollbackMetadata.entrySet()) {
       HoodieRollbackPartitionMetadata meta = entry.getValue();
-      assertTrue(meta.getFailedDeleteFiles() == null || meta.getFailedDeleteFiles().size() == 0);
-      assertTrue(meta.getSuccessDeleteFiles() == null || meta.getSuccessDeleteFiles().size() == 0);
+      assertEquals(0, meta.getFailedDeleteFiles().size());
+      assertEquals(0, meta.getSuccessDeleteFiles().size());
     }
 
     //4. assert file group after rollback, and compare to the rollbackstat
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
index f44d67e83398b..90d0f8835dd14 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
@@ -42,6 +42,7 @@
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.TestCleaner;
+
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
@@ -65,9 +66,9 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.assertNull;
 
 /**
  * Tests covering different clean plan policies/strategies.
@@ -93,11 +94,12 @@ public void testInvalidCleaningTriggerStrategy() {
 
   private static Stream<Arguments> argumentsForTestKeepLatestCommits() {
     return Stream.of(
-                Arguments.of(false, false, false),
-                Arguments.of(true, false, false),
-                Arguments.of(false, true, false),
-                Arguments.of(false, false, true)
-        );
+        Arguments.of(false, false, false, false),
+        Arguments.of(true, false, false, false),
+        Arguments.of(true, true, false, false),
+        Arguments.of(false, false, true, false),
+        Arguments.of(false, false, false, true)
+    );
   }
 
   /**
@@ -105,17 +107,22 @@ private static Stream<Arguments> argumentsForTestKeepLatestCommits() {
    */
   @ParameterizedTest
   @MethodSource("argumentsForTestKeepLatestCommits")
-  public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
+  public void testKeepLatestCommits(
+      boolean simulateFailureRetry, boolean simulateMetadataFailure,
+      boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-                .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
-                .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                        .withIncrementalCleaningMode(enableIncrementalClean)
-                        .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
-                        .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
-                        .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
-                        .retainCommits(2)
-                        .withMaxCommitsBeforeCleaning(2).build())
-                .build();
+        .withMetadataConfig(
+            HoodieMetadataConfig.newBuilder()
+                .withAssumeDatePartitioning(true)
+                .build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withIncrementalCleaningMode(enableIncrementalClean)
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+            .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(2)
+            .withMaxCommitsBeforeCleaning(2).build())
+        .build();
 
     HoodieTestTable testTable = HoodieTestTable.of(metaClient);
     String p0 = "2020/01/01";
@@ -130,20 +137,21 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
     testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
 
     HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001",
-                Collections.unmodifiableMap(new HashMap<String, List<String>>() {
-                    {
-                      put(p0, CollectionUtils.createImmutableList(file1P0C0));
-                      put(p1, CollectionUtils.createImmutableList(file1P1C0));
-                    }
-                })
-        );
+        Collections.unmodifiableMap(new HashMap<String, List<String>>() {
+          {
+            put(p0, CollectionUtils.createImmutableList(file1P0C0));
+            put(p1, CollectionUtils.createImmutableList(file1P1C0));
+          }
+        })
+    );
     metaClient.getActiveTimeline().saveAsComplete(
-                new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"),
-                Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
-    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry, 2, true);
+    List<HoodieCleanStat> hoodieCleanStatsOne =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure, 2, true);
     assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
     assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
     assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
@@ -160,9 +168,10 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
       }
     });
     metaClient.getActiveTimeline().saveAsComplete(
-                new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000003"),
-                Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
-    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry, 4, true);
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000003"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    List<HoodieCleanStat> hoodieCleanStatsTwo =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure, 4, true);
     assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions and clean any files");
     assertTrue(testTable.baseFileExists(p0, "00000000000003", file2P0C1));
     assertTrue(testTable.baseFileExists(p1, "00000000000003", file2P1C1));
@@ -171,40 +180,42 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
 
     // make next commit, with 2 updates to existing files, and 1 insert
     String file3P0C2 = testTable.addInflightCommit("00000000000005")
-                .withBaseFilesInPartition(p0, file1P0C0)
-                .withBaseFilesInPartition(p0, file2P0C1)
-                .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
+        .withBaseFilesInPartition(p0, file1P0C0)
+        .withBaseFilesInPartition(p0, file2P0C1)
+        .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
     commitMetadata = generateCommitMetadata("00000000000003",
-                CollectionUtils.createImmutableMap(
-                        p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
+        CollectionUtils.createImmutableMap(
+            p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
     metaClient.getActiveTimeline().saveAsComplete(
-                new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000005"),
-                Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000005"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
-    List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry, 6, true);
+    List<HoodieCleanStat> hoodieCleanStatsThree =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure, 6, true);
     assertEquals(0, hoodieCleanStatsThree.size(),
-                "Must not clean any file. We have to keep 1 version before the latest commit time to keep");
+        "Must not clean any file. We have to keep 1 version before the latest commit time to keep");
     assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
 
     // make next commit, with 2 updates to existing files, and 1 insert
     String file4P0C3 = testTable.addInflightCommit("00000000000007")
-                .withBaseFilesInPartition(p0, file1P0C0)
-                .withBaseFilesInPartition(p0, file2P0C1)
-                .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
+        .withBaseFilesInPartition(p0, file1P0C0)
+        .withBaseFilesInPartition(p0, file2P0C1)
+        .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
     commitMetadata = generateCommitMetadata("00000000000004",
-                CollectionUtils.createImmutableMap(
-                        p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
+        CollectionUtils.createImmutableMap(
+            p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
     metaClient.getActiveTimeline().saveAsComplete(
-                new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000007"),
-                Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000007"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
-    List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry, 8, true);
+    List<HoodieCleanStat> hoodieCleanStatsFour =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure, 8, true);
     // enableBootstrapSourceClean would delete the bootstrap base file as the same time
     HoodieCleanStat partitionCleanStat = getCleanStat(hoodieCleanStatsFour, p0);
 
     assertEquals(enableBootstrapSourceClean ? 2 : 1, partitionCleanStat.getSuccessDeleteFiles().size()
-                + (partitionCleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0
-                : partitionCleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least one old file");
+        + (partitionCleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0
+        : partitionCleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least one old file");
     assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
     assertTrue(testTable.baseFileExists(p0, "00000000000003", file1P0C0));
     assertTrue(testTable.baseFileExists(p0, "00000000000005", file1P0C0));
@@ -220,19 +231,20 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
     String file5P0C4 = testTable.addInflightCommit("00000000000009")
-                .withBaseFilesInPartition(p0, file1P0C0)
-                .withBaseFilesInPartition(p0, file2P0C1)
-                .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
+        .withBaseFilesInPartition(p0, file1P0C0)
+        .withBaseFilesInPartition(p0, file2P0C1)
+        .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
     commitMetadata = generateCommitMetadata("00000000000009", CollectionUtils.createImmutableMap(
-                p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file5P0C4)));
+        p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file5P0C4)));
     metaClient.getActiveTimeline().saveAsComplete(
-                new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000009"),
-                Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000009"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
-    List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry, 10, true);
+    List<HoodieCleanStat> hoodieCleanStatsFive =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure, 10, true);
 
     assertEquals(0, hoodieCleanStatsFive.size(), "Must not clean any files since at least 2 commits are needed from last clean operation before "
-                + "clean can be scheduled again");
+        + "clean can be scheduled again");
     assertTrue(testTable.baseFileExists(p0, "00000000000003", file1P0C0));
     assertTrue(testTable.baseFileExists(p0, "00000000000005", file1P0C0));
     assertTrue(testTable.baseFileExists(p0, "00000000000003", file2P0C1));
@@ -243,13 +255,14 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
     // No cleaning on partially written file, with no commit.
     testTable.forCommit("00000000000011").withBaseFilesInPartition(p0, file3P0C2);
     commitMetadata = generateCommitMetadata("00000000000011", CollectionUtils.createImmutableMap(p0,
-                CollectionUtils.createImmutableList(file3P0C2)));
+        CollectionUtils.createImmutableList(file3P0C2)));
     metaClient.getActiveTimeline().createNewInstant(
-                new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000011"));
+        new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000011"));
     metaClient.getActiveTimeline().transitionRequestedToInflight(
-                new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000011"),
-                Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
-    List<HoodieCleanStat> hoodieCleanStatsFive2 = runCleaner(config, simulateFailureRetry, 12, true);
+        new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000011"),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    List<HoodieCleanStat> hoodieCleanStatsFive2 =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure, 12, true);
     HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive2, p0);
     assertNull(cleanStat, "Must not clean any files");
     assertTrue(testTable.baseFileExists(p0, "00000000000005", file3P0C2));
@@ -374,7 +387,13 @@ public void testKeepLatestFileVersionsMOR() throws Exception {
 
     HoodieWriteConfig config =
                 HoodieWriteConfig.newBuilder().withPath(basePath)
-                        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
+                        .withMetadataConfig(
+                            HoodieMetadataConfig.newBuilder()
+                                .withAssumeDatePartitioning(true)
+                                // Column Stats Index is disabled, since these tests construct tables which are
+                                // not valid (empty commit metadata, invalid parquet files)
+                                .withMetadataIndexColumnStats(false)
+                                .build())
                         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                                 .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
                         .build();
@@ -412,7 +431,13 @@ public void testKeepLatestCommitsMOR() throws Exception {
 
     HoodieWriteConfig config =
             HoodieWriteConfig.newBuilder().withPath(basePath)
-                    .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
+                    .withMetadataConfig(
+                        HoodieMetadataConfig.newBuilder()
+                            .withAssumeDatePartitioning(true)
+                            // Column Stats Index is disabled, since these tests construct tables which are
+                            // not valid (empty commit metadata, invalid parquet files)
+                            .withMetadataIndexColumnStats(false)
+                            .build())
                     .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1).build())
                     .build();
@@ -454,15 +479,17 @@ public void testKeepLatestCommitsMOR() throws Exception {
    */
   @ParameterizedTest
   @MethodSource("argumentsForTestKeepLatestCommits")
-  public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
+  public void testKeepXHoursWithCleaning(
+      boolean simulateFailureRetry, boolean simulateMetadataFailure,
+      boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                    .withIncrementalCleaningMode(enableIncrementalClean)
-                    .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
-                    .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
-                    .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(2).build())
-            .build();
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withIncrementalCleaningMode(enableIncrementalClean)
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+            .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(2).build())
+        .build();
 
     HoodieTestTable testTable = HoodieTestTable.of(metaClient);
     String p0 = "2020/01/01";
@@ -488,12 +515,13 @@ public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean ena
             })
     );
     metaClient.getActiveTimeline().saveAsComplete(
-            new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs),
-            Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
-    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
+    List<HoodieCleanStat> hoodieCleanStatsOne =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure);
     assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
     assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
     assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
@@ -512,9 +540,10 @@ public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean ena
       }
     });
     metaClient.getActiveTimeline().saveAsComplete(
-            new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs),
-            Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
-    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
+        new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs),
+        Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    List<HoodieCleanStat> hoodieCleanStatsTwo =
+        runCleaner(config, simulateFailureRetry, simulateMetadataFailure);
     assertEquals(2, hoodieCleanStatsTwo.size(), "Should clean one file each from both the partitions");
     assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
     assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkCopyOnWriteTableArchiveWithReplace.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkCopyOnWriteTableArchiveWithReplace.java
index acd7e835eedc4..b4d6aefa71fe6 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkCopyOnWriteTableArchiveWithReplace.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkCopyOnWriteTableArchiveWithReplace.java
@@ -79,7 +79,7 @@ public void testDeletePartitionAndArchive(boolean metadataEnabled) throws IOExce
       client.startCommitWithTime(instantTime4, HoodieActiveTimeline.REPLACE_COMMIT_ACTION);
       client.deletePartitions(Arrays.asList(DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH), instantTime4);
 
-      // 2nd write batch; 4 commits for the 3rd partition; the 3rd commit to trigger archiving the replace commit
+      // 2nd write batch; 4 commits for the 4th partition; the 4th commit to trigger archiving the replace commit
       for (int i = 5; i < 9; i++) {
         String instantTime = HoodieActiveTimeline.createNewInstantTime(i * 1000);
         client.startCommitWithTime(instantTime);
@@ -97,7 +97,7 @@ public void testDeletePartitionAndArchive(boolean metadataEnabled) throws IOExce
       // verify records
       final HoodieTimeline timeline2 = metaClient.getCommitTimeline().filterCompletedInstants();
       assertEquals(5, countRecordsOptionallySince(jsc(), basePath(), sqlContext(), timeline2, Option.empty()),
-          "should only have the 4 records from the 3rd partition.");
+          "should only have the 5 records from the 3rd partition.");
     }
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
index f4f47d375b22d..3b30c5b767367 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
@@ -21,6 +21,9 @@
 
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -43,12 +46,16 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.IOException;
 import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.List;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
 import static org.apache.hudi.config.HoodieWriteConfig.AUTO_COMMIT_ENABLE;
@@ -56,6 +63,17 @@
 @Tag("functional")
 public class TestHoodieSparkMergeOnReadTableCompaction extends SparkClientFunctionalTestHarness {
 
+  private static Stream<Arguments> writeLogTest() {
+    // enable metadata table, enable embedded time line server
+    Object[][] data = new Object[][] {
+        {true, true},
+        {true, false},
+        {false, true},
+        {false, false}
+    };
+    return Stream.of(data).map(Arguments::of);
+  }
+
   private HoodieTestDataGenerator dataGen;
   private SparkRDDWriteClient client;
   private HoodieTableMetaClient metaClient;
@@ -104,6 +122,44 @@ public void testWriteDuringCompaction() throws IOException {
     Assertions.assertEquals(300, readTableTotalRecordsNum());
   }
 
+  @ParameterizedTest
+  @MethodSource("writeLogTest")
+  public void testWriteLogDuringCompaction(boolean enableMetadataTable, boolean enableTimelineServer) throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
+        .forTable("test-trip-table")
+        .withPath(basePath())
+        .withSchema(TRIP_EXAMPLE_SCHEMA)
+        .withParallelism(2, 2)
+        .withAutoCommit(true)
+        .withEmbeddedTimelineServerEnabled(enableTimelineServer)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withMaxNumDeltaCommitsBeforeCompaction(1).build())
+        .withLayoutConfig(HoodieLayoutConfig.newBuilder()
+            .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name())
+            .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build())
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()).build();
+    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, config.getProps());
+    client = getHoodieWriteClient(config);
+
+    final List<HoodieRecord> records = dataGen.generateInserts("001", 100);
+    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 2);
+
+    // initialize 100 records
+    client.upsert(writeRecords, client.startCommit());
+    // update 100 records
+    client.upsert(writeRecords, client.startCommit());
+    // schedule compaction
+    client.scheduleCompaction(Option.empty());
+    // delete 50 records
+    List<HoodieKey> toBeDeleted = records.stream().map(HoodieRecord::getKey).limit(50).collect(Collectors.toList());
+    JavaRDD<HoodieKey> deleteRecords = jsc().parallelize(toBeDeleted, 2);
+    client.delete(deleteRecords, client.startCommit());
+    // insert the same 100 records again
+    client.upsert(writeRecords, client.startCommit());
+    Assertions.assertEquals(100, readTableTotalRecordsNum());
+  }
+
   private long readTableTotalRecordsNum() {
     return HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(),
         Arrays.stream(dataGen.getPartitionPaths()).map(p -> Paths.get(basePath(), p).toString()).collect(Collectors.toList()), basePath()).size();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
index 7655cf93f104b..339e9e119ac09 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
@@ -150,7 +150,7 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
     // NOTE: First writer will have Metadata table DISABLED
     HoodieWriteConfig.Builder cfgBuilder =
         getConfigBuilder(false, rollbackUsingMarkers, HoodieIndex.IndexType.SIMPLE);
-    
+
     addConfigsForPopulateMetaFields(cfgBuilder, true);
     HoodieWriteConfig cfg = cfgBuilder.build();
 
@@ -166,7 +166,7 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
       /*
        * Write 1 (only inserts)
        */
-      String newCommitTime = "001";
+      String newCommitTime = "000000001";
       client.startCommitWithTime(newCommitTime);
 
       List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
@@ -183,7 +183,7 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
 
       Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
       assertTrue(deltaCommit.isPresent());
-      assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
+      assertEquals("000000001", deltaCommit.get().getTimestamp(), "Delta commit should be 000000001");
 
       Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
       assertFalse(commit.isPresent());
@@ -201,7 +201,7 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
       /*
        * Write 2 (inserts + updates - testing failed delta commit)
        */
-      final String commitTime1 = "002";
+      final String commitTime1 = "000000002";
       // WriteClient with custom config (disable small file handling)
       // NOTE: Second writer will have Metadata table ENABLED
       try (SparkRDDWriteClient secondClient = getHoodieWriteClient(getHoodieWriteConfigWithSmallFileHandlingOff(true));) {
@@ -480,7 +480,7 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       copyOfRecords.clear();
 
       // Rollback latest commit first
-      client.restoreToInstant("000");
+      client.restoreToInstant("000", cfg.isMetadataTableEnabled());
 
       metaClient = HoodieTableMetaClient.reload(metaClient);
       allFiles = listAllBaseFilesInPath(hoodieTable);
@@ -530,7 +530,7 @@ void testMORTableRestore(boolean restoreAfterCompaction) throws Exception {
 
       if (!restoreAfterCompaction) {
         // restore to 002 and validate records.
-        client.restoreToInstant("002");
+        client.restoreToInstant("002", cfg.isMetadataTableEnabled());
         validateRecords(cfg, metaClient, updates1);
       } else {
         // trigger compaction and then trigger couple of upserts followed by restore.
@@ -546,7 +546,7 @@ void testMORTableRestore(boolean restoreAfterCompaction) throws Exception {
         validateRecords(cfg, metaClient, updates5);
 
         // restore to 003 and validate records.
-        client.restoreToInstant("003");
+        client.restoreToInstant("003", cfg.isMetadataTableEnabled());
         validateRecords(cfg, metaClient, updates2);
       }
     }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
index 71e4b4b4e6e3f..1b41769ecc6da 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
@@ -17,6 +17,13 @@
 
 package org.apache.hudi.testutils;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hudi.avro.model.HoodieActionInstant;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
@@ -25,6 +32,7 @@
 import org.apache.hudi.client.SparkTaskContextSupplier;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.HoodieCleanStat;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
@@ -59,19 +67,12 @@
 import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter;
 import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.WorkloadStat;
 import org.apache.hudi.timeline.service.TimelineService;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
@@ -82,6 +83,7 @@
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.TestInfo;
+import scala.Tuple2;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -91,14 +93,14 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Properties;
 import java.util.Random;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
-import scala.Tuple2;
-
 import static org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -571,7 +573,7 @@ public void validateMetadata(HoodieTestTable testTable, List<String> inflightCom
       }
     });
     if (doFullValidation) {
-      runFullValidation(writeConfig, metadataTableBasePath, engineContext);
+      runFullValidation(table.getConfig().getMetadataConfig(), writeConfig, metadataTableBasePath, engineContext);
     }
 
     LOG.info("Validation time=" + timer.endTimer());
@@ -644,7 +646,10 @@ protected void validateFilesPerPartition(HoodieTestTable testTable, HoodieTableM
     assertEquals(metadataFilenames.size(), numFiles);
   }
 
-  private void runFullValidation(HoodieWriteConfig writeConfig, String metadataTableBasePath, HoodieSparkEngineContext engineContext) {
+  private void runFullValidation(HoodieMetadataConfig metadataConfig,
+                                 HoodieWriteConfig writeConfig,
+                                 String metadataTableBasePath,
+                                 HoodieSparkEngineContext engineContext) {
     HoodieBackedTableMetadataWriter metadataWriter = metadataWriter(writeConfig);
     assertNotNull(metadataWriter, "MetadataWriter should have been initialized");
 
@@ -666,16 +671,25 @@ private void runFullValidation(HoodieWriteConfig writeConfig, String metadataTab
     // in the .hoodie folder.
     List<String> metadataTablePartitions = FSUtils.getAllPartitionPaths(engineContext, HoodieTableMetadata.getMetadataTableBasePath(basePath),
         false, false);
-    Assertions.assertEquals(metadataWriter.getEnabledPartitionTypes().size(), metadataTablePartitions.size());
+
+    List<MetadataPartitionType> enabledPartitionTypes = metadataWriter.getEnabledPartitionTypes();
+
+    Assertions.assertEquals(enabledPartitionTypes.size(), metadataTablePartitions.size());
+
+    Map<String, MetadataPartitionType> partitionTypeMap = enabledPartitionTypes.stream()
+        .collect(Collectors.toMap(MetadataPartitionType::getPartitionPath, Function.identity()));
 
     // Metadata table should automatically compact and clean
     // versions are +1 as autoClean / compaction happens end of commits
     int numFileVersions = metadataWriteConfig.getCleanerFileVersionsRetained() + 1;
     HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metadataMetaClient, metadataMetaClient.getActiveTimeline());
     metadataTablePartitions.forEach(partition -> {
+      MetadataPartitionType partitionType = partitionTypeMap.get(partition);
+
       List<FileSlice> latestSlices = fsView.getLatestFileSlices(partition).collect(Collectors.toList());
-      assertTrue(latestSlices.stream().map(FileSlice::getBaseFile).count() <= 1, "Should have a single latest base file");
-      assertTrue(latestSlices.size() <= 1, "Should have a single latest file slice");
+
+      assertTrue(latestSlices.stream().map(FileSlice::getBaseFile).filter(Objects::nonNull).count() <= partitionType.getFileGroupCount(), "Should have a single latest base file");
+      assertTrue(latestSlices.size() <= partitionType.getFileGroupCount(), "Should have a single latest file slice");
       assertTrue(latestSlices.size() <= numFileVersions, "Should limit file slice to "
           + numFileVersions + " but was " + latestSlices.size());
     });
@@ -687,7 +701,7 @@ public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnl
 
   public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly, boolean isEmpty) throws IOException {
     HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(),
-            CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
+            CleanPlanV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>());
     if (inflightOnly) {
       HoodieTestTable.of(metaClient).addInflightClean(instantTime, cleanerPlan);
     } else {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index 05d7f99446e94..75d2d14221d32 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -38,6 +38,7 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.io.storage.HoodieHFileUtils;
 import org.apache.hudi.timeline.service.TimelineService;
 
 import org.apache.avro.Schema;
@@ -66,6 +67,8 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.io.storage.HoodieHFileReader.SCHEMA_KEY;
+
 /**
  * Utility methods to aid testing inside the HoodieClient module.
  */
@@ -241,9 +244,10 @@ public static Stream<GenericRecord> readHFile(JavaSparkContext jsc, String[] pat
     Schema schema = null;
     for (String path : paths) {
       try {
-        HFile.Reader reader = HFile.createReader(fs, new Path(path), cacheConfig, fs.getConf());
+        HFile.Reader reader =
+            HoodieHFileUtils.createHFileReader(fs, new Path(path), cacheConfig, fs.getConf());
         if (schema == null) {
-          schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get("schema".getBytes())));
+          schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(SCHEMA_KEY.getBytes())));
         }
         HFileScanner scanner = reader.getScanner(false, false);
         if (!scanner.seekTo()) {
@@ -252,7 +256,7 @@ public static Stream<GenericRecord> readHFile(JavaSparkContext jsc, String[] pat
         }
 
         do {
-          Cell c = scanner.getKeyValue();
+          Cell c = scanner.getCell();
           byte[] value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength());
           valuesAsList.add(HoodieAvroUtils.bytesToAvro(value, schema));
         } while (scanner.next());
diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml
index 33c98144ccabb..5d9e267f3eefc 100644
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index 1a558aeae3326..251889c17fcc4 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
@@ -90,6 +90,9 @@
             <import>${basedir}/src/main/avro/HoodieClusteringPlan.avsc</import>
             <import>${basedir}/src/main/avro/HoodieRequestedReplaceMetadata.avsc</import>
             <import>${basedir}/src/main/avro/HoodieMetadata.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieIndexPartitionInfo.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieIndexPlan.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieIndexCommitMetadata.avsc</import>
             <import>${basedir}/src/main/avro/HoodieArchivedMetaEntry.avsc</import>
           </imports>
         </configuration>
@@ -114,6 +117,12 @@
       <artifactId>avro</artifactId>
     </dependency>
 
+    <!-- caffeine -->
+    <dependency>
+      <groupId>com.github.ben-manes.caffeine</groupId>
+      <artifactId>caffeine</artifactId>
+    </dependency>
+
     <!-- Parquet -->
     <dependency>
       <groupId>org.apache.parquet</groupId>
@@ -221,14 +230,13 @@
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-client</artifactId>
       <version>${hbase.version}</version>
-      <scope>test</scope>
     </dependency>
-
+    
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
-      <!-- Unfortunately, HFile is packaged ONLY under hbase-server --> 
+      <!-- Unfortunately, HFile is packaged ONLY under hbase-server -->
       <scope>compile</scope>
       <exclusions>
         <exclusion>
@@ -257,5 +265,11 @@
       <version>1.8.0</version>
     </dependency>
 
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
 </project>
diff --git a/hudi-common/src/main/avro/HoodieArchivedMetaEntry.avsc b/hudi-common/src/main/avro/HoodieArchivedMetaEntry.avsc
index c052147f718ea..81bcaf745e5b8 100644
--- a/hudi-common/src/main/avro/HoodieArchivedMetaEntry.avsc
+++ b/hudi-common/src/main/avro/HoodieArchivedMetaEntry.avsc
@@ -120,6 +120,14 @@
             "HoodieCommitMetadata"
          ],
          "default": null
+      },
+      {
+         "name":"hoodieIndexCommitMetadata",
+         "type":[
+            "null",
+            "HoodieIndexCommitMetadata"
+         ],
+         "default": null
       }
    ]
 }
diff --git a/hudi-common/src/main/avro/HoodieCleanPartitionMetadata.avsc b/hudi-common/src/main/avro/HoodieCleanPartitionMetadata.avsc
index 877b7259188f7..3cb096d48bd7a 100644
--- a/hudi-common/src/main/avro/HoodieCleanPartitionMetadata.avsc
+++ b/hudi-common/src/main/avro/HoodieCleanPartitionMetadata.avsc
@@ -24,6 +24,7 @@
      {"name": "policy", "type": "string"},
      {"name": "deletePathPatterns", "type": {"type": "array", "items": "string"}},
      {"name": "successDeleteFiles", "type": {"type": "array", "items": "string"}},
-     {"name": "failedDeleteFiles", "type": {"type": "array", "items": "string"}}
+     {"name": "failedDeleteFiles", "type": {"type": "array", "items": "string"}},
+     {"name": "isPartitionDeleted", "type":["null", "boolean"], "default": null }
   ] 
 }
diff --git a/hudi-common/src/main/avro/HoodieCleanerPlan.avsc b/hudi-common/src/main/avro/HoodieCleanerPlan.avsc
index c4481c2cd804c..e4c8638c86e6f 100644
--- a/hudi-common/src/main/avro/HoodieCleanerPlan.avsc
+++ b/hudi-common/src/main/avro/HoodieCleanerPlan.avsc
@@ -92,6 +92,14 @@
           }
       }}],
       "default" : null
+    },
+    {
+      "name": "partitionsToBeDeleted",
+      "doc": "partitions to be deleted",
+      "type":["null",
+      { "type":"array", "items":"string"}
+      ],
+      "default": null
     }
   ]
 }
diff --git a/hudi-common/src/main/avro/HoodieIndexCommitMetadata.avsc b/hudi-common/src/main/avro/HoodieIndexCommitMetadata.avsc
new file mode 100644
index 0000000000000..098a8c88e7328
--- /dev/null
+++ b/hudi-common/src/main/avro/HoodieIndexCommitMetadata.avsc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+  "namespace": "org.apache.hudi.avro.model",
+  "type": "record",
+  "name": "HoodieIndexCommitMetadata",
+  "fields": [
+    {
+      "name": "version",
+      "doc": "This field replaces the field filesToBeDeletedPerPartition",
+      "type": [
+        "int",
+        "null"
+      ],
+      "default": 1
+    },
+    {
+      "name": "operationType",
+      "doc": "This field replaces the field filesToBeDeletedPerPartition",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "indexPartitionInfos",
+      "doc": "This field contains the info for each partition that got indexed",
+      "type": [
+        "null",
+        {
+          "type": "array",
+          "items": "HoodieIndexPartitionInfo"
+        }
+      ],
+      "default": null
+    }
+  ]
+}
diff --git a/hudi-common/src/main/avro/HoodieIndexPartitionInfo.avsc b/hudi-common/src/main/avro/HoodieIndexPartitionInfo.avsc
new file mode 100644
index 0000000000000..52ed1e96aa3b3
--- /dev/null
+++ b/hudi-common/src/main/avro/HoodieIndexPartitionInfo.avsc
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+  "namespace": "org.apache.hudi.avro.model",
+  "type": "record",
+  "name": "HoodieIndexPartitionInfo",
+  "fields": [
+    {
+      "name": "version",
+      "type": [
+        "int",
+        "null"
+      ],
+      "default": 1
+    },
+    {
+      "name": "metadataPartitionPath",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "indexUptoInstant",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    }
+  ]
+}
diff --git a/hudi-common/src/main/avro/HoodieIndexPlan.avsc b/hudi-common/src/main/avro/HoodieIndexPlan.avsc
new file mode 100644
index 0000000000000..9fb7ec311e34a
--- /dev/null
+++ b/hudi-common/src/main/avro/HoodieIndexPlan.avsc
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+  "namespace": "org.apache.hudi.avro.model",
+  "type": "record",
+  "name": "HoodieIndexPlan",
+  "fields": [
+    {
+      "name": "version",
+      "type": [
+        "int",
+        "null"
+      ],
+      "default": 1
+    },
+    {
+      "name": "indexPartitionInfos",
+      "type": [
+        "null",
+        {
+          "type": "array",
+          "items": "HoodieIndexPartitionInfo"
+        }
+      ],
+      "default": null
+    }
+  ]
+}
diff --git a/hudi-common/src/main/avro/HoodieMetadata.avsc b/hudi-common/src/main/avro/HoodieMetadata.avsc
index a6807d58e48e0..a8d7ca72bd143 100644
--- a/hudi-common/src/main/avro/HoodieMetadata.avsc
+++ b/hudi-common/src/main/avro/HoodieMetadata.avsc
@@ -115,23 +115,202 @@
                             "type": [
                                 "null",
                                 "string"
-                            ]
+                            ],
+                            "default" : null
                         },
                         {
                             "doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type",
                             "name": "minValue",
                             "type": [
+                                // Those types should be aligned with Parquet `Statistics` impl
+                                // making sure that we implement semantic consistent across file formats
+                                //
+                                // NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
+                                //       into one of the following types, making sure that their corresponding
+                                //       ordering is preserved
                                 "null",
-                                "string"
-                            ]
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "BooleanWrapper",
+                                    "doc": "A record wrapping boolean type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "boolean",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "IntWrapper",
+                                    "doc": "A record wrapping int type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "int",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "LongWrapper",
+                                    "doc": "A record wrapping long type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "long",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "FloatWrapper",
+                                    "doc": "A record wrapping float type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "float",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "DoubleWrapper",
+                                    "doc": "A record wrapping double type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "double",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "BytesWrapper",
+                                    "doc": "A record wrapping bytes type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "bytes",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "StringWrapper",
+                                    "doc": "A record wrapping string type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": "string",
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "DateWrapper",
+                                    "doc": "A record wrapping Date logical type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": {
+                                                "type": "int"
+                                                // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
+                                                //       rely on logical types to do proper encoding of the native Java types,
+                                                //       and hereby have to encode statistic manually
+                                                //"logicalType": "date"
+                                            },
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "DecimalWrapper",
+                                    "doc": "A record wrapping Decimal logical type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": {
+                                                "type": "bytes",
+                                                "logicalType": "decimal",
+                                                // NOTE: This is equivalent to Spark's [[DoubleDecimal]] and should
+                                                //       be enough for almost any possible use-cases
+                                                "precision": 30,
+                                                "scale": 15
+                                            },
+                                            "name": "value"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "TimeMicrosWrapper",
+                                    "doc": "A record wrapping Time-micros logical type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": {
+                                                "type": "long",
+                                                "logicalType": "time-micros"
+                                            },
+                                            "name": "value"
+
+                                        }
+                                    ]
+                                },
+                                {
+                                    "namespace": "org.apache.hudi.avro.model",
+                                    "type": "record",
+                                    "name": "TimestampMicrosWrapper",
+                                    "doc": "A record wrapping Timestamp-micros logical type to be able to be used it w/in Avro's Union",
+                                    "fields": [
+                                        {
+                                            "type": {
+                                                "type": "long"
+                                                // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
+                                                //       rely on logical types to do proper encoding of the native Java types,
+                                                //       and hereby have to encode statistic manually
+                                                //"logicalType": "timestamp-micros"
+                                            },
+                                            "name": "value"
+                                        }
+                                    ]
+                                }
+                            ],
+                            "default": null
                         },
                         {
                             "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type",
                             "name": "maxValue",
                             "type": [
+                                // Those types should be aligned with Parquet `Statistics` impl
+                                // making sure that we implement semantic consistent across file formats
+                                //
+                                // NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
+                                //       into one of the following types, making sure that their corresponding
+                                //       ordering is preserved
                                 "null",
-                                "string"
-                            ]
+                                "org.apache.hudi.avro.model.BooleanWrapper",
+                                "org.apache.hudi.avro.model.IntWrapper",
+                                "org.apache.hudi.avro.model.LongWrapper",
+                                "org.apache.hudi.avro.model.FloatWrapper",
+                                "org.apache.hudi.avro.model.DoubleWrapper",
+                                "org.apache.hudi.avro.model.BytesWrapper",
+                                "org.apache.hudi.avro.model.StringWrapper",
+                                "org.apache.hudi.avro.model.DateWrapper",
+                                "org.apache.hudi.avro.model.DecimalWrapper",
+                                "org.apache.hudi.avro.model.TimeMicrosWrapper",
+                                "org.apache.hudi.avro.model.TimestampMicrosWrapper"
+                            ],
+                            "default": null
                         },
                         {
                             "doc": "Total count of values",
diff --git a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
index 8c88bfb001fc2..ec70653b9c124 100644
--- a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -69,7 +69,7 @@ public abstract class BaseHoodieTableFileIndex {
   private final String[] partitionColumns;
 
   private final FileSystemViewStorageConfig fileSystemStorageConfig;
-  private final HoodieMetadataConfig metadataConfig;
+  protected final HoodieMetadataConfig metadataConfig;
 
   private final HoodieTableQueryType queryType;
   private final Option<String> specifiedQueryInstant;
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/ConvertingGenericData.java b/hudi-common/src/main/java/org/apache/hudi/avro/ConvertingGenericData.java
new file mode 100644
index 0000000000000..9d36e214fb852
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/ConvertingGenericData.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.avro;
+
+import org.apache.avro.Conversions;
+import org.apache.avro.Schema;
+import org.apache.avro.UnresolvedUnionException;
+import org.apache.avro.data.TimeConversions;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericFixed;
+
+import java.util.Map;
+
+/**
+ * Custom instance of the {@link GenericData} model incorporating conversions from the
+ * common Avro logical types like "decimal", "uuid", "date", "time-micros", "timestamp-micros"
+ *
+ * NOTE: Given that this code has to be interoperable w/ Spark 2 (which relies on Avro 1.8.2)
+ *       this model can't support newer conversion introduced in Avro 1.10 at the moment
+ */
+public class ConvertingGenericData extends GenericData {
+
+  private static final Conversions.DecimalConversion DECIMAL_CONVERSION = new Conversions.DecimalConversion();
+  private static final Conversions.UUIDConversion UUID_CONVERSION = new Conversions.UUIDConversion();
+  private static final TimeConversions.DateConversion DATE_CONVERSION = new TimeConversions.DateConversion();
+  private static final TimeConversions.TimeMicrosConversion TIME_MICROS_CONVERSION = new TimeConversions.TimeMicrosConversion();
+  private static final TimeConversions.TimestampMicrosConversion TIMESTAMP_MICROS_CONVERSION = new TimeConversions.TimestampMicrosConversion();
+
+  // NOTE: Those are not supported in Avro 1.8.2
+  // TODO re-enable upon upgrading to 1.10
+  // private static final TimeConversions.TimestampMillisConversion TIMESTAMP_MILLIS_CONVERSION = new TimeConversions.TimestampMillisConversion();
+  // private static final TimeConversions.TimeMillisConversion TIME_MILLIS_CONVERSION = new TimeConversions.TimeMillisConversion();
+  // private static final TimeConversions.LocalTimestampMillisConversion LOCAL_TIMESTAMP_MILLIS_CONVERSION = new TimeConversions.LocalTimestampMillisConversion();
+  // private static final TimeConversions.LocalTimestampMicrosConversion LOCAL_TIMESTAMP_MICROS_CONVERSION = new TimeConversions.LocalTimestampMicrosConversion();
+
+  public static final GenericData INSTANCE = new ConvertingGenericData();
+
+  private ConvertingGenericData() {
+    addLogicalTypeConversion(DECIMAL_CONVERSION);
+    addLogicalTypeConversion(UUID_CONVERSION);
+    addLogicalTypeConversion(DATE_CONVERSION);
+    addLogicalTypeConversion(TIME_MICROS_CONVERSION);
+    addLogicalTypeConversion(TIMESTAMP_MICROS_CONVERSION);
+    // NOTE: Those are not supported in Avro 1.8.2
+    // TODO re-enable upon upgrading to 1.10
+    // addLogicalTypeConversion(TIME_MILLIS_CONVERSION);
+    // addLogicalTypeConversion(TIMESTAMP_MILLIS_CONVERSION);
+    // addLogicalTypeConversion(LOCAL_TIMESTAMP_MILLIS_CONVERSION);
+    // addLogicalTypeConversion(LOCAL_TIMESTAMP_MICROS_CONVERSION);
+  }
+
+  @Override
+  public boolean validate(Schema schema, Object datum) {
+    switch (schema.getType()) {
+      case RECORD:
+        if (!isRecord(datum)) {
+          return false;
+        }
+        for (Schema.Field f : schema.getFields()) {
+          if (!validate(f.schema(), getField(datum, f.name(), f.pos()))) {
+            return false;
+          }
+        }
+        return true;
+      case ENUM:
+        if (!isEnum(datum)) {
+          return false;
+        }
+        return schema.getEnumSymbols().contains(datum.toString());
+      case ARRAY:
+        if (!(isArray(datum))) {
+          return false;
+        }
+        for (Object element : getArrayAsCollection(datum)) {
+          if (!validate(schema.getElementType(), element)) {
+            return false;
+          }
+        }
+        return true;
+      case MAP:
+        if (!(isMap(datum))) {
+          return false;
+        }
+        @SuppressWarnings(value = "unchecked")
+        Map<Object, Object> map = (Map<Object, Object>) datum;
+        for (Map.Entry<Object, Object> entry : map.entrySet()) {
+          if (!validate(schema.getValueType(), entry.getValue())) {
+            return false;
+          }
+        }
+        return true;
+      case UNION:
+        try {
+          int i = resolveUnion(schema, datum);
+          return validate(schema.getTypes().get(i), datum);
+        } catch (UnresolvedUnionException e) {
+          return false;
+        }
+      case FIXED:
+        return (datum instanceof GenericFixed && ((GenericFixed) datum).bytes().length == schema.getFixedSize())
+            || DECIMAL_CONVERSION.getConvertedType().isInstance(datum);
+      case STRING:
+        return isString(datum)
+            || UUID_CONVERSION.getConvertedType().isInstance(datum);
+      case BYTES:
+        return isBytes(datum)
+            || DECIMAL_CONVERSION.getConvertedType().isInstance(datum);
+      case INT:
+        return isInteger(datum)
+            || DATE_CONVERSION.getConvertedType().isInstance(datum);
+      case LONG:
+        return isLong(datum)
+            || TIME_MICROS_CONVERSION.getConvertedType().isInstance(datum)
+            || TIMESTAMP_MICROS_CONVERSION.getConvertedType().isInstance(datum);
+      case FLOAT:
+        return isFloat(datum);
+      case DOUBLE:
+        return isDouble(datum);
+      case BOOLEAN:
+        return isBoolean(datum);
+      case NULL:
+        return datum == null;
+      default:
+        return false;
+    }
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index 5cb18dc8d1509..1055bd522022c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -19,6 +19,8 @@
 package org.apache.hudi.avro;
 
 import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.SchemaCompatibility;
+import org.apache.avro.Conversions;
 import org.apache.avro.Conversions.DecimalConversion;
 import org.apache.avro.JsonProperties;
 import org.apache.avro.LogicalTypes;
@@ -55,29 +57,44 @@
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.sql.Date;
 import java.sql.Timestamp;
 import java.time.LocalDate;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
+import java.util.HashMap;
+import java.util.TimeZone;
+import java.util.Iterator;
+
 import java.util.stream.Collectors;
 
+import static org.apache.avro.Schema.Type.UNION;
+
 /**
  * Helper class to do common stuff across Avro.
  */
 public class HoodieAvroUtils {
 
-  private static ThreadLocal<BinaryEncoder> reuseEncoder = ThreadLocal.withInitial(() -> null);
+  private static final ThreadLocal<BinaryEncoder> BINARY_ENCODER = ThreadLocal.withInitial(() -> null);
+  private static final ThreadLocal<BinaryDecoder> BINARY_DECODER = ThreadLocal.withInitial(() -> null);
 
-  private static ThreadLocal<BinaryDecoder> reuseDecoder = ThreadLocal.withInitial(() -> null);
+  private static final long MILLIS_PER_DAY = 86400000L;
+
+  //Export for test
+  public static final Conversions.DecimalConversion DECIMAL_CONVERSION = new Conversions.DecimalConversion();
 
   // As per https://avro.apache.org/docs/current/spec.html#names
-  private static String INVALID_AVRO_CHARS_IN_NAMES = "[^A-Za-z0-9_]";
-  private static String INVALID_AVRO_FIRST_CHAR_IN_NAMES = "[^A-Za-z_]";
-  private static String MASK_FOR_INVALID_CHARS_IN_NAMES = "__";
+  private static final String INVALID_AVRO_CHARS_IN_NAMES = "[^A-Za-z0-9_]";
+  private static final String INVALID_AVRO_FIRST_CHAR_IN_NAMES = "[^A-Za-z_]";
+  private static final String MASK_FOR_INVALID_CHARS_IN_NAMES = "__";
 
   // All metadata fields are optional strings.
   public static final Schema METADATA_FIELD_SCHEMA =
@@ -93,10 +110,10 @@ public static byte[] avroToBytes(GenericRecord record) {
   }
 
   public static <T extends IndexedRecord> byte[] indexedRecordToBytes(T record) {
-    GenericDatumWriter<T> writer = new GenericDatumWriter<>(record.getSchema());
+    GenericDatumWriter<T> writer = new GenericDatumWriter<>(record.getSchema(), ConvertingGenericData.INSTANCE);
     try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
-      BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, reuseEncoder.get());
-      reuseEncoder.set(encoder);
+      BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, BINARY_ENCODER.get());
+      BINARY_ENCODER.set(encoder);
       writer.write(record, encoder);
       encoder.flush();
       return out.toByteArray();
@@ -131,8 +148,8 @@ public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOEx
    * Convert serialized bytes back into avro record.
    */
   public static GenericRecord bytesToAvro(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException {
-    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, reuseDecoder.get());
-    reuseDecoder.set(decoder);
+    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, BINARY_DECODER.get());
+    BINARY_DECODER.set(decoder);
     GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(writerSchema, readerSchema);
     return reader.read(null, decoder);
   }
@@ -147,6 +164,18 @@ public static GenericRecord jsonBytesToAvro(byte[] bytes, Schema schema) throws
     return reader.read(null, jsonDecoder);
   }
 
+  /**
+   * True if the schema contains this name of field
+   */
+  public static boolean containsFieldInSchema(Schema schema, String fieldName) {
+    try {
+      Field field = schema.getField(fieldName);
+      return field != null;
+    } catch (Exception e) {
+      return false;
+    }
+  }
+
   public static boolean isMetadataField(String fieldName) {
     return HoodieRecord.COMMIT_TIME_METADATA_FIELD.equals(fieldName)
         || HoodieRecord.COMMIT_SEQNO_METADATA_FIELD.equals(fieldName)
@@ -307,13 +336,19 @@ public static GenericRecord addOperationToRecord(GenericRecord record, HoodieOpe
    * @param newFieldNames Null Field names to be added
    */
   public static Schema appendNullSchemaFields(Schema schema, List<String> newFieldNames) {
-    List<Field> newFields = schema.getFields().stream()
-        .map(field -> new Field(field.name(), field.schema(), field.doc(), field.defaultVal())).collect(Collectors.toList());
+    List<Field> newFields = new ArrayList<>();
     for (String newField : newFieldNames) {
       newFields.add(new Schema.Field(newField, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE));
     }
+    return createNewSchemaWithExtraFields(schema, newFields);
+  }
+
+  public static Schema createNewSchemaWithExtraFields(Schema schema, List<Field> newFields) {
+    List<Field> fields = schema.getFields().stream()
+        .map(field -> new Field(field.name(), field.schema(), field.doc(), field.defaultVal())).collect(Collectors.toList());
+    fields.addAll(newFields);
     Schema newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
-    newSchema.setFields(newFields);
+    newSchema.setFields(fields);
     return newSchema;
   }
 
@@ -374,7 +409,7 @@ public static GenericRecord rewriteRecord(GenericRecord oldRecord, Schema newSch
       }
     }
 
-    if (!GenericData.get().validate(newSchema, newRecord)) {
+    if (!ConvertingGenericData.INSTANCE.validate(newSchema, newRecord)) {
       throw new SchemaCompatibilityException(
           "Unable to validate the rewritten record " + oldRecord + " against schema " + newSchema);
     }
@@ -382,23 +417,13 @@ public static GenericRecord rewriteRecord(GenericRecord oldRecord, Schema newSch
     return newRecord;
   }
 
-  public static GenericRecord rewriteRecord(GenericRecord genericRecord, Schema newSchema, boolean copyOverMetaFields, GenericRecord fallbackRecord) {
+  public static GenericRecord rewriteRecordWithMetadata(GenericRecord genericRecord, Schema newSchema, String fileName) {
     GenericRecord newRecord = new GenericData.Record(newSchema);
-    boolean isSpecificRecord = genericRecord instanceof SpecificRecordBase;
     for (Schema.Field f : newSchema.getFields()) {
-      if (!(isSpecificRecord && isMetadataField(f.name()))) {
-        copyOldValueOrSetDefault(genericRecord, newRecord, f);
-      }
-      if (isMetadataField(f.name()) && copyOverMetaFields) {
-        // if meta field exists in primary generic record, copy over.
-        if (genericRecord.getSchema().getField(f.name()) != null) {
-          copyOldValueOrSetDefault(genericRecord, newRecord, f);
-        } else if (fallbackRecord != null && fallbackRecord.getSchema().getField(f.name()) != null) {
-          // if not, try to copy from the fallback record.
-          copyOldValueOrSetDefault(fallbackRecord, newRecord, f);
-        }
-      }
+      copyOldValueOrSetDefault(genericRecord, newRecord, f);
     }
+    // do not preserve FILENAME_METADATA_FIELD
+    newRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, fileName);
     if (!GenericData.get().validate(newSchema, newRecord)) {
       throw new SchemaCompatibilityException(
           "Unable to validate the rewritten record " + genericRecord + " against schema " + newSchema);
@@ -422,9 +447,13 @@ private static void copyOldValueOrSetDefault(GenericRecord oldRecord, GenericRec
 
     if (fieldValue != null) {
       // In case field's value is a nested record, we have to rewrite it as well
-      Object newFieldValue = fieldValue instanceof GenericRecord
-          ? rewriteRecord((GenericRecord) fieldValue, resolveNullableSchema(field.schema()))
-          : fieldValue;
+      Object newFieldValue;
+      if (fieldValue instanceof GenericRecord) {
+        GenericRecord record = (GenericRecord) fieldValue;
+        newFieldValue = rewriteRecord(record, resolveUnionSchema(field.schema(), record.getSchema().getFullName()));
+      } else {
+        newFieldValue = fieldValue;
+      }
       newRecord.put(field.name(), newFieldValue);
     } else if (field.defaultVal() instanceof JsonProperties.Null) {
       newRecord.put(field.name(), null);
@@ -511,6 +540,56 @@ public static Object getNestedFieldVal(GenericRecord record, String fieldName, b
     }
   }
 
+  /**
+   * Get schema for the given field and record. Field can be nested, denoted by dot notation. e.g: a.b.c
+   *
+   * @param record    - record containing the value of the given field
+   * @param fieldName - name of the field
+   * @return
+   */
+  public static Schema getNestedFieldSchemaFromRecord(GenericRecord record, String fieldName) {
+    String[] parts = fieldName.split("\\.");
+    GenericRecord valueNode = record;
+    int i = 0;
+    for (; i < parts.length; i++) {
+      String part = parts[i];
+      Object val = valueNode.get(part);
+
+      if (i == parts.length - 1) {
+        return resolveNullableSchema(valueNode.getSchema().getField(part).schema());
+      } else {
+        if (!(val instanceof GenericRecord)) {
+          throw new HoodieException("Cannot find a record at part value :" + part);
+        }
+        valueNode = (GenericRecord) val;
+      }
+    }
+    throw new HoodieException("Failed to get schema. Not a valid field name: " + fieldName);
+  }
+
+
+  /**
+   * Get schema for the given field and write schema. Field can be nested, denoted by dot notation. e.g: a.b.c
+   * Use this method when record is not available. Otherwise, prefer to use {@link #getNestedFieldSchemaFromRecord(GenericRecord, String)}
+   *
+   * @param writeSchema - write schema of the record
+   * @param fieldName   -  name of the field
+   * @return
+   */
+  public static Schema getNestedFieldSchemaFromWriteSchema(Schema writeSchema, String fieldName) {
+    String[] parts = fieldName.split("\\.");
+    int i = 0;
+    for (; i < parts.length; i++) {
+      String part = parts[i];
+      Schema schema = writeSchema.getField(part).schema();
+
+      if (i == parts.length - 1) {
+        return resolveNullableSchema(schema);
+      }
+    }
+    throw new HoodieException("Failed to get schema. Not a valid field name: " + fieldName);
+  }
+
   /**
    * Returns the string value of the given record {@code rec} and field {@code fieldName}.
    * The field and value both could be missing.
@@ -646,7 +725,27 @@ public static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPa
     return getRecordColumnValues(record, columns, schema.get(), consistentLogicalTimestampEnabled);
   }
 
-  private static Schema resolveNullableSchema(Schema schema) {
+  private static Schema resolveUnionSchema(Schema schema, String fieldSchemaFullName) {
+    if (schema.getType() != Schema.Type.UNION) {
+      return schema;
+    }
+
+    List<Schema> innerTypes = schema.getTypes();
+    Schema nonNullType =
+        innerTypes.stream()
+            .filter(it -> it.getType() != Schema.Type.NULL && Objects.equals(it.getFullName(), fieldSchemaFullName))
+            .findFirst()
+            .orElse(null);
+
+    if (nonNullType == null) {
+      throw new AvroRuntimeException(
+          String.format("Unsupported Avro UNION type %s: Only UNION of a null type and a non-null type is supported", schema));
+    }
+
+    return nonNullType;
+  }
+
+  public static Schema resolveNullableSchema(Schema schema) {
     if (schema.getType() != Schema.Type.UNION) {
       return schema;
     }
@@ -665,4 +764,271 @@ private static Schema resolveNullableSchema(Schema schema) {
 
     return nonNullType;
   }
+
+  /**
+   * Given a avro record with a given schema, rewrites it into the new schema while setting fields only from the new schema.
+   * support deep rewrite for nested record.
+   * This particular method does the following things :
+   * a) Create a new empty GenericRecord with the new schema.
+   * b) For GenericRecord, copy over the data from the old schema to the new schema or set default values for all fields of this transformed schema
+   *
+   * @param oldRecord oldRecord to be rewritten
+   * @param newSchema newSchema used to rewrite oldRecord
+   * @return newRecord for new Schema
+   */
+  public static GenericRecord rewriteRecordWithNewSchema(IndexedRecord oldRecord, Schema newSchema) {
+    Object newRecord = rewriteRecordWithNewSchema(oldRecord, oldRecord.getSchema(), newSchema);
+    return (GenericData.Record) newRecord;
+  }
+
+  private static Object rewriteRecordWithNewSchema(Object oldRecord, Schema oldSchema, Schema newSchema) {
+    if (oldRecord == null) {
+      return null;
+    }
+    switch (newSchema.getType()) {
+      case RECORD:
+        if (!(oldRecord instanceof IndexedRecord)) {
+          throw new IllegalArgumentException("cannot rewrite record with different type");
+        }
+        IndexedRecord indexedRecord = (IndexedRecord) oldRecord;
+        List<Schema.Field> fields = newSchema.getFields();
+        Map<Integer, Object> helper = new HashMap<>();
+
+        for (int i = 0; i < fields.size(); i++) {
+          Schema.Field field = fields.get(i);
+          if (oldSchema.getField(field.name()) != null) {
+            Schema.Field oldField = oldSchema.getField(field.name());
+            helper.put(i, rewriteRecordWithNewSchema(indexedRecord.get(oldField.pos()), oldField.schema(), fields.get(i).schema()));
+          }
+        }
+        GenericData.Record newRecord = new GenericData.Record(newSchema);
+        for (int i = 0; i < fields.size(); i++) {
+          if (helper.containsKey(i)) {
+            newRecord.put(i, helper.get(i));
+          } else {
+            if (fields.get(i).defaultVal() instanceof JsonProperties.Null) {
+              newRecord.put(i, null);
+            } else {
+              newRecord.put(i, fields.get(i).defaultVal());
+            }
+          }
+        }
+        return newRecord;
+      case ARRAY:
+        if (!(oldRecord instanceof Collection)) {
+          throw new IllegalArgumentException("cannot rewrite record with different type");
+        }
+        Collection array = (Collection)oldRecord;
+        List<Object> newArray = new ArrayList();
+        for (Object element : array) {
+          newArray.add(rewriteRecordWithNewSchema(element, oldSchema.getElementType(), newSchema.getElementType()));
+        }
+        return newArray;
+      case MAP:
+        if (!(oldRecord instanceof Map)) {
+          throw new IllegalArgumentException("cannot rewrite record with different type");
+        }
+        Map<Object, Object> map = (Map<Object, Object>) oldRecord;
+        Map<Object, Object> newMap = new HashMap<>();
+        for (Map.Entry<Object, Object> entry : map.entrySet()) {
+          newMap.put(entry.getKey(), rewriteRecordWithNewSchema(entry.getValue(), oldSchema.getValueType(), newSchema.getValueType()));
+        }
+        return newMap;
+      case UNION:
+        return rewriteRecordWithNewSchema(oldRecord, getActualSchemaFromUnion(oldSchema, oldRecord), getActualSchemaFromUnion(newSchema, oldRecord));
+      default:
+        return rewritePrimaryType(oldRecord, oldSchema, newSchema);
+    }
+  }
+
+  private static Object rewritePrimaryType(Object oldValue, Schema oldSchema, Schema newSchema) {
+    Schema realOldSchema = oldSchema;
+    if (realOldSchema.getType() == UNION) {
+      realOldSchema = getActualSchemaFromUnion(oldSchema, oldValue);
+    }
+    if (realOldSchema.getType() == newSchema.getType()) {
+      switch (realOldSchema.getType()) {
+        case NULL:
+        case BOOLEAN:
+        case INT:
+        case LONG:
+        case FLOAT:
+        case DOUBLE:
+        case BYTES:
+        case STRING:
+          return oldValue;
+        case FIXED:
+          // fixed size and name must match:
+          if (!SchemaCompatibility.schemaNameEquals(realOldSchema, newSchema) || realOldSchema.getFixedSize() != newSchema.getFixedSize()) {
+            // deal with the precision change for decimalType
+            if (realOldSchema.getLogicalType() instanceof LogicalTypes.Decimal) {
+              final byte[] bytes;
+              bytes = ((GenericFixed) oldValue).bytes();
+              LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) realOldSchema.getLogicalType();
+              BigDecimal bd = new BigDecimal(new BigInteger(bytes), decimal.getScale()).setScale(((LogicalTypes.Decimal) newSchema.getLogicalType()).getScale());
+              return DECIMAL_CONVERSION.toFixed(bd, newSchema, newSchema.getLogicalType());
+            }
+          } else {
+            return oldValue;
+          }
+          return oldValue;
+        default:
+          throw new AvroRuntimeException("Unknown schema type: " + newSchema.getType());
+      }
+    } else {
+      return rewritePrimaryTypeWithDiffSchemaType(oldValue, realOldSchema, newSchema);
+    }
+  }
+
+  private static Object rewritePrimaryTypeWithDiffSchemaType(Object oldValue, Schema oldSchema, Schema newSchema) {
+    switch (newSchema.getType()) {
+      case NULL:
+      case BOOLEAN:
+        break;
+      case INT:
+        if (newSchema.getLogicalType() == LogicalTypes.date() && oldSchema.getType() == Schema.Type.STRING) {
+          return fromJavaDate(java.sql.Date.valueOf(oldValue.toString()));
+        }
+        break;
+      case LONG:
+        if (oldSchema.getType() == Schema.Type.INT) {
+          return ((Integer) oldValue).longValue();
+        }
+        break;
+      case FLOAT:
+        if ((oldSchema.getType() == Schema.Type.INT)
+                || (oldSchema.getType() == Schema.Type.LONG)) {
+          return oldSchema.getType() == Schema.Type.INT ? ((Integer) oldValue).floatValue() : ((Long) oldValue).floatValue();
+        }
+        break;
+      case DOUBLE:
+        if (oldSchema.getType() == Schema.Type.FLOAT) {
+          // java float cannot convert to double directly, deal with float precision change
+          return Double.valueOf(oldValue + "");
+        } else if (oldSchema.getType() == Schema.Type.INT) {
+          return ((Integer) oldValue).doubleValue();
+        } else if (oldSchema.getType() == Schema.Type.LONG) {
+          return ((Long) oldValue).doubleValue();
+        }
+        break;
+      case BYTES:
+        if (oldSchema.getType() == Schema.Type.STRING) {
+          return (oldValue.toString()).getBytes(StandardCharsets.UTF_8);
+        }
+        break;
+      case STRING:
+        if (oldSchema.getType() == Schema.Type.BYTES) {
+          return String.valueOf(((byte[]) oldValue));
+        }
+        if (oldSchema.getLogicalType() == LogicalTypes.date()) {
+          return toJavaDate((Integer) oldValue).toString();
+        }
+        if (oldSchema.getType() == Schema.Type.INT
+                || oldSchema.getType() == Schema.Type.LONG
+                || oldSchema.getType() == Schema.Type.FLOAT
+                || oldSchema.getType() == Schema.Type.DOUBLE) {
+          return oldValue.toString();
+        }
+        if (oldSchema.getType() == Schema.Type.FIXED && oldSchema.getLogicalType() instanceof LogicalTypes.Decimal) {
+          final byte[] bytes;
+          bytes = ((GenericFixed) oldValue).bytes();
+          LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) oldSchema.getLogicalType();
+          BigDecimal bd = new BigDecimal(new BigInteger(bytes), decimal.getScale());
+          return bd.toString();
+        }
+        break;
+      case FIXED:
+        // deal with decimal Type
+        if (newSchema.getLogicalType() instanceof LogicalTypes.Decimal) {
+          // TODO: support more types
+          if (oldSchema.getType() == Schema.Type.STRING
+                  || oldSchema.getType() == Schema.Type.DOUBLE
+                  || oldSchema.getType() == Schema.Type.INT
+                  || oldSchema.getType() == Schema.Type.LONG
+                  || oldSchema.getType() == Schema.Type.FLOAT) {
+            LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) newSchema.getLogicalType();
+            BigDecimal bigDecimal = null;
+            if (oldSchema.getType() == Schema.Type.STRING) {
+              bigDecimal = new java.math.BigDecimal(oldValue.toString())
+                      .setScale(decimal.getScale());
+            } else {
+              // Due to Java, there will be precision problems in direct conversion, we should use string instead of use double
+              bigDecimal = new java.math.BigDecimal(oldValue.toString())
+                      .setScale(decimal.getScale());
+            }
+            return DECIMAL_CONVERSION.toFixed(bigDecimal, newSchema, newSchema.getLogicalType());
+          }
+        }
+        break;
+      default:
+    }
+    throw new AvroRuntimeException(String.format("cannot support rewrite value for schema type: %s since the old schema type is: %s", newSchema, oldSchema));
+  }
+
+  // convert days to Date
+  private static java.sql.Date toJavaDate(int days) {
+    long localMillis = Math.multiplyExact(days, MILLIS_PER_DAY);
+    int timeZoneOffset;
+    TimeZone defaultTimeZone = TimeZone.getDefault();
+    if (defaultTimeZone instanceof sun.util.calendar.ZoneInfo) {
+      timeZoneOffset = ((sun.util.calendar.ZoneInfo) defaultTimeZone).getOffsetsByWall(localMillis, null);
+    } else {
+      timeZoneOffset = defaultTimeZone.getOffset(localMillis - defaultTimeZone.getRawOffset());
+    }
+    return new java.sql.Date(localMillis - timeZoneOffset);
+  }
+
+  // convert Date to days
+  private static int fromJavaDate(Date date) {
+    long millisUtc = date.getTime();
+    long millisLocal = millisUtc + TimeZone.getDefault().getOffset(millisUtc);
+    int julianDays = Math.toIntExact(Math.floorDiv(millisLocal, MILLIS_PER_DAY));
+    return julianDays;
+  }
+
+  private static Schema getActualSchemaFromUnion(Schema schema, Object data) {
+    Schema actualSchema;
+    if (!schema.getType().equals(UNION)) {
+      return schema;
+    }
+    if (schema.getTypes().size() == 2
+            && schema.getTypes().get(0).getType() == Schema.Type.NULL) {
+      actualSchema = schema.getTypes().get(1);
+    } else if (schema.getTypes().size() == 2
+            && schema.getTypes().get(1).getType() == Schema.Type.NULL) {
+      actualSchema = schema.getTypes().get(0);
+    } else if (schema.getTypes().size() == 1) {
+      actualSchema = schema.getTypes().get(0);
+    } else {
+      // deal complex union. this should not happened in hoodie,
+      // since flink/spark do not write this type.
+      int i = GenericData.get().resolveUnion(schema, data);
+      actualSchema = schema.getTypes().get(i);
+    }
+    return actualSchema;
+  }
+
+  /**
+   * Given avro records, rewrites them with new schema.
+   *
+   * @param oldRecords oldRecords to be rewrite
+   * @param newSchema newSchema used to rewrite oldRecord
+   * @return a iterator of rewrote GeneriRcords
+   */
+  public static Iterator<GenericRecord> rewriteRecordWithNewSchema(Iterator<GenericRecord> oldRecords, Schema newSchema) {
+    if (oldRecords == null || newSchema == null) {
+      return Collections.emptyIterator();
+    }
+    return new Iterator<GenericRecord>() {
+      @Override
+      public boolean hasNext() {
+        return oldRecords.hasNext();
+      }
+
+      @Override
+      public GenericRecord next() {
+        return rewriteRecordWithNewSchema(oldRecords.next(), newSchema);
+      }
+    };
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java
index 18827c66bf096..c3920211ae948 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java
@@ -27,6 +27,7 @@
 import org.apache.parquet.schema.MessageType;
 
 import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Wrap AvroWriterSupport for plugging in the bloom filter.
@@ -36,6 +37,7 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
   private Option<BloomFilter> bloomFilterOpt;
   private String minRecordKey;
   private String maxRecordKey;
+  private Map<String, String> footerMetadata = new HashMap<>();
 
   public static final String OLD_HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY = "com.uber.hoodie.bloomfilter";
   public static final String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY = "org.apache.hudi.bloomfilter";
@@ -44,24 +46,23 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
   public static final String HOODIE_BLOOM_FILTER_TYPE_CODE = "hoodie_bloom_filter_type_code";
 
   public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, Option<BloomFilter> bloomFilterOpt) {
-    super(schema, avroSchema);
+    super(schema, avroSchema, ConvertingGenericData.INSTANCE);
     this.bloomFilterOpt = bloomFilterOpt;
   }
 
   @Override
   public WriteSupport.FinalizedWriteContext finalizeWrite() {
-    HashMap<String, String> extraMetaData = new HashMap<>();
     if (bloomFilterOpt.isPresent()) {
-      extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilterOpt.get().serializeToString());
+      footerMetadata.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilterOpt.get().serializeToString());
       if (minRecordKey != null && maxRecordKey != null) {
-        extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
-        extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
+        footerMetadata.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
+        footerMetadata.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
       }
       if (bloomFilterOpt.get().getBloomFilterTypeCode().name().contains(HoodieDynamicBoundedBloomFilter.TYPE_CODE_PREFIX)) {
-        extraMetaData.put(HOODIE_BLOOM_FILTER_TYPE_CODE, bloomFilterOpt.get().getBloomFilterTypeCode().name());
+        footerMetadata.put(HOODIE_BLOOM_FILTER_TYPE_CODE, bloomFilterOpt.get().getBloomFilterTypeCode().name());
       }
     }
-    return new WriteSupport.FinalizedWriteContext(extraMetaData);
+    return new WriteSupport.FinalizedWriteContext(footerMetadata);
   }
 
   public void add(String recordKey) {
@@ -80,4 +81,8 @@ public void add(String recordKey) {
       }
     }
   }
+
+  public void addFooterMetadata(String key, String value) {
+    footerMetadata.put(key, value);
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/HoodieCleanStat.java b/hudi-common/src/main/java/org/apache/hudi/common/HoodieCleanStat.java
index e9de502f78bbf..fa5d80419434b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/HoodieCleanStat.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/HoodieCleanStat.java
@@ -47,19 +47,22 @@ public class HoodieCleanStat implements Serializable {
   private final List<String> failedDeleteBootstrapBaseFiles;
   // Earliest commit that was retained in this clean
   private final String earliestCommitToRetain;
+  // set to true if partition is deleted
+  private final boolean isPartitionDeleted;
 
   public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<String> deletePathPatterns,
       List<String> successDeleteFiles, List<String> failedDeleteFiles, String earliestCommitToRetain) {
     this(policy, partitionPath, deletePathPatterns, successDeleteFiles, failedDeleteFiles, earliestCommitToRetain,
         CollectionUtils.createImmutableList(), CollectionUtils.createImmutableList(),
-        CollectionUtils.createImmutableList());
+        CollectionUtils.createImmutableList(), false);
   }
 
   public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<String> deletePathPatterns,
                          List<String> successDeleteFiles, List<String> failedDeleteFiles,
                          String earliestCommitToRetain, List<String> deleteBootstrapBasePathPatterns,
                          List<String> successDeleteBootstrapBaseFiles,
-                         List<String> failedDeleteBootstrapBaseFiles) {
+                         List<String> failedDeleteBootstrapBaseFiles,
+                         boolean isPartitionDeleted) {
     this.policy = policy;
     this.partitionPath = partitionPath;
     this.deletePathPatterns = deletePathPatterns;
@@ -69,6 +72,7 @@ public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<S
     this.deleteBootstrapBasePathPatterns = deleteBootstrapBasePathPatterns;
     this.successDeleteBootstrapBaseFiles = successDeleteBootstrapBaseFiles;
     this.failedDeleteBootstrapBaseFiles = failedDeleteBootstrapBaseFiles;
+    this.isPartitionDeleted = isPartitionDeleted;
   }
 
   public HoodieCleaningPolicy getPolicy() {
@@ -107,6 +111,10 @@ public String getEarliestCommitToRetain() {
     return earliestCommitToRetain;
   }
 
+  public boolean isPartitionDeleted() {
+    return isPartitionDeleted;
+  }
+
   public static HoodieCleanStat.Builder newBuilder() {
     return new Builder();
   }
@@ -125,6 +133,7 @@ public static class Builder {
     private List<String> deleteBootstrapBasePathPatterns;
     private List<String> successDeleteBootstrapBaseFiles;
     private List<String> failedDeleteBootstrapBaseFiles;
+    private boolean isPartitionDeleted;
 
     public Builder withPolicy(HoodieCleaningPolicy policy) {
       this.policy = policy;
@@ -172,10 +181,15 @@ public Builder withEarliestCommitRetained(Option<HoodieInstant> earliestCommitTo
       return this;
     }
 
+    public Builder isPartitionDeleted(boolean isPartitionDeleted) {
+      this.isPartitionDeleted = isPartitionDeleted;
+      return this;
+    }
+
     public HoodieCleanStat build() {
       return new HoodieCleanStat(policy, partitionPath, deletePathPatterns, successDeleteFiles, failedDeleteFiles,
           earliestCommitToRetain, deleteBootstrapBasePathPatterns, successDeleteBootstrapBaseFiles,
-        failedDeleteBootstrapBaseFiles);
+        failedDeleteBootstrapBaseFiles, isPartitionDeleted);
     }
   }
 
@@ -190,7 +204,8 @@ public String toString() {
         + ", earliestCommitToRetain='" + earliestCommitToRetain
         + ", deleteBootstrapBasePathPatterns=" + deleteBootstrapBasePathPatterns
         + ", successDeleteBootstrapBaseFiles=" + successDeleteBootstrapBaseFiles
-        + ", failedDeleteBootstrapBaseFiles=" + failedDeleteBootstrapBaseFiles + '\''
+        + ", failedDeleteBootstrapBaseFiles=" + failedDeleteBootstrapBaseFiles
+        + ", isPartitionDeleted=" + isPartitionDeleted + '\''
         + '}';
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java
index b0278319fce46..2403ffd995750 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java
@@ -78,7 +78,7 @@ public SimpleBloomFilter(String serString) {
   @Override
   public void add(String key) {
     if (key == null) {
-      throw new NullPointerException("Key cannot by null");
+      throw new NullPointerException("Key cannot be null");
     }
     filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
   }
@@ -86,7 +86,7 @@ public void add(String key) {
   @Override
   public boolean mightContain(String key) {
     if (key == null) {
-      throw new NullPointerException("Key cannot by null");
+      throw new NullPointerException("Key cannot be null");
     }
     return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
index 3700d01a60ea6..b8a2c202cb6c9 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
@@ -33,10 +33,12 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.io.storage.HoodieHFileUtils;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
@@ -178,9 +180,7 @@ private static String getUserKeyFromCellKey(String cellKey) {
   private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) {
     try {
       LOG.info("Opening HFile for reading :" + hFilePath);
-      HFile.Reader reader = HFile.createReader(fileSystem, new HFilePathForReader(hFilePath),
-          new CacheConfig(conf), conf);
-      return reader;
+      return HoodieHFileUtils.createHFileReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf), conf);
     } catch (IOException ioe) {
       throw new HoodieIOException(ioe.getMessage(), ioe);
     }
@@ -259,7 +259,7 @@ private void initIndexInfo() {
 
     private HoodieBootstrapIndexInfo fetchBootstrapIndexInfo() throws IOException {
       return TimelineMetadataUtils.deserializeAvroMetadata(
-          partitionIndexReader().loadFileInfo().get(INDEX_INFO_KEY),
+          partitionIndexReader().getHFileInfo().get(INDEX_INFO_KEY),
           HoodieBootstrapIndexInfo.class);
     }
 
@@ -306,7 +306,7 @@ private <T> List<T> getAllKeys(HFileScanner scanner, Function<String, T> convert
       try {
         boolean available = scanner.seekTo();
         while (available) {
-          keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getKeyValue()))));
+          keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getCell()))));
           available = scanner.next();
         }
       } catch (IOException ioe) {
@@ -528,13 +528,13 @@ public void close() {
     @Override
     public void begin() {
       try {
-        HFileContext meta = new HFileContextBuilder().build();
+        HFileContext meta = new HFileContextBuilder().withCellComparator(new HoodieKVComparator()).build();
         this.indexByPartitionWriter = HFile.getWriterFactory(metaClient.getHadoopConf(),
             new CacheConfig(metaClient.getHadoopConf())).withPath(metaClient.getFs(), indexByPartitionPath)
-            .withFileContext(meta).withComparator(new HoodieKVComparator()).create();
+            .withFileContext(meta).create();
         this.indexByFileIdWriter = HFile.getWriterFactory(metaClient.getHadoopConf(),
             new CacheConfig(metaClient.getHadoopConf())).withPath(metaClient.getFs(), indexByFileIdPath)
-            .withFileContext(meta).withComparator(new HoodieKVComparator()).create();
+            .withFileContext(meta).create();
       } catch (IOException ioe) {
         throw new HoodieIOException(ioe.getMessage(), ioe);
       }
@@ -581,6 +581,6 @@ public String getName() {
    * This class is explicitly used as Key Comparator to workaround hard coded
    * legacy format class names inside HBase. Otherwise we will face issues with shading.
    */
-  public static class HoodieKVComparator extends KeyValue.KVComparator {
+  public static class HoodieKVComparator extends CellComparatorImpl {
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java
index c4308f79d710c..c77e292b4775f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java
@@ -38,6 +38,8 @@ public class HoodieConfig implements Serializable {
 
   private static final Logger LOG = LogManager.getLogger(HoodieConfig.class);
 
+  protected static final String CONFIG_VALUES_DELIMITER = ",";
+
   public static HoodieConfig create(FSDataInputStream inputStream) throws IOException {
     HoodieConfig config = new HoodieConfig();
     config.props.load(inputStream);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
index 7d964f3582d69..14a055cb17cc0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.config;
 
 import org.apache.hudi.common.engine.EngineType;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 
 import javax.annotation.concurrent.Immutable;
@@ -26,6 +27,7 @@
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
+import java.util.List;
 import java.util.Properties;
 
 /**
@@ -71,6 +73,13 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .sinceVersion("0.7.0")
       .withDocumentation("Enable asynchronous cleaning for metadata table");
 
+  // Async index
+  public static final ConfigProperty<Boolean> ASYNC_INDEX_ENABLE = ConfigProperty
+      .key(METADATA_PREFIX + ".index.async")
+      .defaultValue(false)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Enable asynchronous indexing of metadata table.");
+
   // Maximum delta commits before compaction occurs
   public static final ConfigProperty<Integer> COMPACT_NUM_DELTA_COMMITS = ConfigProperty
       .key(METADATA_PREFIX + ".compact.max.delta.commits")
@@ -144,6 +153,12 @@ public final class HoodieMetadataConfig extends HoodieConfig {
           + "log files and read parallelism in the bloom filter index partition. The recommendation is to size the "
           + "file group count such that the base files are under 1GB.");
 
+  public static final ConfigProperty<Integer> BLOOM_FILTER_INDEX_PARALLELISM = ConfigProperty
+      .key(METADATA_PREFIX + ".index.bloom.filter.parallelism")
+      .defaultValue(200)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Parallelism to use for generating bloom filter index in metadata table.");
+
   public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty
       .key(METADATA_PREFIX + ".index.column.stats.enable")
       .defaultValue(false)
@@ -160,21 +175,31 @@ public final class HoodieMetadataConfig extends HoodieConfig {
           + "log files and read parallelism in the column stats index partition. The recommendation is to size the "
           + "file group count such that the base files are under 1GB.");
 
-  public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS = ConfigProperty
-      .key(METADATA_PREFIX + ".index.column.stats.all_columns.enable")
-      .defaultValue(true)
-      .sinceVersion("0.11.0")
-      .withDocumentation("Enable indexing column ranges of user data files for all columns under "
-          + "metadata table key lookups. When enabled, metadata table will have a partition to "
-          + "store the column ranges and will be used for pruning files during the index lookups. "
-          + "Only applies if " + ENABLE_METADATA_INDEX_COLUMN_STATS.key() + " is enabled.");
-
   public static final ConfigProperty<Integer> COLUMN_STATS_INDEX_PARALLELISM = ConfigProperty
           .key(METADATA_PREFIX + ".index.column.stats.parallelism")
           .defaultValue(10)
           .sinceVersion("0.11.0")
           .withDocumentation("Parallelism to use, when generating column stats index.");
 
+  public static final ConfigProperty<String> COLUMN_STATS_INDEX_FOR_COLUMNS = ConfigProperty
+      .key(METADATA_PREFIX + ".index.column.stats.column.list")
+      .noDefaultValue()
+      .sinceVersion("0.11.0")
+      .withDocumentation("Comma-separated list of columns for which column stats index will be built. If not set, all columns will be indexed");
+
+  public static final ConfigProperty<String> BLOOM_FILTER_INDEX_FOR_COLUMNS = ConfigProperty
+      .key(METADATA_PREFIX + ".index.bloom.filter.column.list")
+      .noDefaultValue()
+      .sinceVersion("0.11.0")
+      .withDocumentation("Comma-separated list of columns for which bloom filter index will be built. If not set, only record key will be indexed.");
+
+  public static final ConfigProperty<Integer> METADATA_INDEX_CHECK_TIMEOUT_SECONDS = ConfigProperty
+      .key(METADATA_PREFIX + ".index.check.timeout.seconds")
+      .defaultValue(900)
+      .sinceVersion("0.11.0")
+      .withDocumentation("After the async indexer has finished indexing upto the base instant, it will ensure that all inflight writers "
+          + "reliably write index updates as well. If this timeout expires, then the indexer will abort itself safely.");
+
   public static final ConfigProperty<Boolean> POPULATE_META_FIELDS = ConfigProperty
       .key(METADATA_PREFIX + ".populate.meta.fields")
       .defaultValue(false)
@@ -217,8 +242,12 @@ public boolean isColumnStatsIndexEnabled() {
     return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS);
   }
 
-  public boolean isMetadataColumnStatsIndexForAllColumnsEnabled() {
-    return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS);
+  public List<String> getColumnsEnabledForColumnStatsIndex() {
+    return StringUtils.split(getString(COLUMN_STATS_INDEX_FOR_COLUMNS), CONFIG_VALUES_DELIMITER);
+  }
+
+  public List<String> getColumnsEnabledForBloomFilterIndex() {
+    return StringUtils.split(getString(BLOOM_FILTER_INDEX_FOR_COLUMNS), CONFIG_VALUES_DELIMITER);
   }
 
   public int getBloomFilterIndexFileGroupCount() {
@@ -229,10 +258,18 @@ public int getColumnStatsIndexFileGroupCount() {
     return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT);
   }
 
+  public int getBloomFilterIndexParallelism() {
+    return getIntOrDefault(BLOOM_FILTER_INDEX_PARALLELISM);
+  }
+
   public int getColumnStatsIndexParallelism() {
     return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM);
   }
 
+  public int getIndexingCheckTimeoutSeconds() {
+    return getIntOrDefault(METADATA_INDEX_CHECK_TIMEOUT_SECONDS);
+  }
+
   public boolean enableMetrics() {
     return getBoolean(METRICS_ENABLE);
   }
@@ -241,8 +278,8 @@ public String getDirectoryFilterRegex() {
     return getString(DIR_FILTER_REGEX);
   }
 
-  public boolean enableFullScan() {
-    return getBoolean(ENABLE_FULL_SCAN_LOG_FILES);
+  public boolean allowFullScan() {
+    return getBooleanOrDefault(ENABLE_FULL_SCAN_LOG_FILES);
   }
 
   public boolean populateMetaFields() {
@@ -285,6 +322,11 @@ public Builder withMetadataIndexBloomFilterFileGroups(int fileGroupCount) {
       return this;
     }
 
+    public Builder withBloomFilterIndexParallelism(int parallelism) {
+      metadataConfig.setValue(BLOOM_FILTER_INDEX_PARALLELISM, String.valueOf(parallelism));
+      return this;
+    }
+
     public Builder withMetadataIndexColumnStats(boolean enable) {
       metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable));
       return this;
@@ -300,8 +342,18 @@ public Builder withColumnStatsIndexParallelism(int parallelism) {
       return this;
     }
 
-    public Builder withMetadataIndexForAllColumns(boolean enable) {
-      metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS, String.valueOf(enable));
+    public Builder withColumnStatsIndexForColumns(String columns) {
+      metadataConfig.setValue(COLUMN_STATS_INDEX_FOR_COLUMNS, columns);
+      return this;
+    }
+
+    public Builder withBloomFilterIndexForColumns(String columns) {
+      metadataConfig.setValue(BLOOM_FILTER_INDEX_FOR_COLUMNS, columns);
+      return this;
+    }
+
+    public Builder withIndexingCheckTimeout(int timeoutInSeconds) {
+      metadataConfig.setValue(METADATA_INDEX_CHECK_TIMEOUT_SECONDS, String.valueOf(timeoutInSeconds));
       return this;
     }
 
@@ -320,6 +372,11 @@ public Builder withAsyncClean(boolean asyncClean) {
       return this;
     }
 
+    public Builder withAsyncIndex(boolean asyncIndex) {
+      metadataConfig.setValue(ASYNC_INDEX_ENABLE, String.valueOf(asyncIndex));
+      return this;
+    }
+
     public Builder withMaxNumDeltaCommitsBeforeCompaction(int maxNumDeltaCommitsBeforeCompaction) {
       metadataConfig.setValue(COMPACT_NUM_DELTA_COMMITS, String.valueOf(maxNumDeltaCommitsBeforeCompaction));
       return this;
@@ -371,6 +428,11 @@ public Builder withEngineType(EngineType engineType) {
       return this;
     }
 
+    public Builder withProperties(Properties properties) {
+      this.metadataConfig.getProps().putAll(properties);
+      return this;
+    }
+
     public HoodieMetadataConfig build() {
       metadataConfig.setDefaultValue(ENABLE, getDefaultMetadataEnable(engineType));
       metadataConfig.setDefaults(HoodieMetadataConfig.class.getName());
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/engine/EngineProperty.java b/hudi-common/src/main/java/org/apache/hudi/common/engine/EngineProperty.java
index 5e9a516ec4f81..36e7594937b04 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/engine/EngineProperty.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/engine/EngineProperty.java
@@ -26,6 +26,7 @@ public enum EngineProperty {
   EMBEDDED_SERVER_HOST,
   // Pool/queue to use to run compaction.
   COMPACTION_POOL_NAME,
+  CLUSTERING_POOL_NAME,
   TOTAL_CORES_PER_EXECUTOR,
   // Amount of total memory available to each engine executor
   TOTAL_MEMORY_AVAILABLE,
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index 7c9b7cc806fa4..1bde88d3bb647 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
@@ -229,7 +230,7 @@ public static String getRelativePartitionPath(Path basePath, Path fullPartitionP
 
   /**
    * Obtain all the partition paths, that are present in this table, denoted by presence of
-   * {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}.
+   * {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE_PREFIX}.
    *
    * If the basePathStr is a subdirectory of .hoodie folder then we assume that the partitions of an internal
    * table (a hoodie table within the .hoodie directory) are to be obtained.
@@ -245,7 +246,7 @@ public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, Str
     final List<String> partitions = new ArrayList<>();
     processFiles(fs, basePathStr, (locatedFileStatus) -> {
       Path filePath = locatedFileStatus.getPath();
-      if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
+      if (filePath.getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX)) {
         partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
       }
       return true;
@@ -585,12 +586,21 @@ public static Long getSizeInMB(long sizeInBytes) {
   }
 
   public static Path getPartitionPath(String basePath, String partitionPath) {
-    return getPartitionPath(new Path(basePath), partitionPath);
+    if (StringUtils.isNullOrEmpty(partitionPath)) {
+      return new Path(basePath);
+    }
+
+    // NOTE: We have to chop leading "/" to make sure Hadoop does not treat it like
+    //       absolute path
+    String properPartitionPath = partitionPath.startsWith("/")
+        ? partitionPath.substring(1)
+        : partitionPath;
+    return getPartitionPath(new Path(basePath), properPartitionPath);
   }
 
   public static Path getPartitionPath(Path basePath, String partitionPath) {
     // FOr non-partitioned table, return only base-path
-    return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath : new Path(basePath, partitionPath);
+    return StringUtils.isNullOrEmpty(partitionPath) ? basePath : new Path(basePath, partitionPath);
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/DeleteRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/DeleteRecord.java
new file mode 100644
index 0000000000000..003b591c20c05
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/DeleteRecord.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/**
+ * Delete record is a combination of HoodieKey and ordering value.
+ * The record is used for {@link org.apache.hudi.common.table.log.block.HoodieDeleteBlock}
+ * to support per-record deletions. The deletion block is always appended after the data block,
+ * we need to keep the ordering val to combine with the data records when merging, or the data loss
+ * may occur if there are intermediate deletions for the inputs
+ * (a new INSERT comes after a DELETE in one input batch).
+ */
+public class DeleteRecord implements Serializable {
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * The record key and partition path.
+   */
+  private final HoodieKey hoodieKey;
+
+  /**
+   * For purposes of preCombining.
+   */
+  private final Comparable<?> orderingVal;
+
+  private DeleteRecord(HoodieKey hoodieKey, Comparable orderingVal) {
+    this.hoodieKey = hoodieKey;
+    this.orderingVal = orderingVal;
+  }
+
+  public static DeleteRecord create(HoodieKey hoodieKey) {
+    return create(hoodieKey, 0);
+  }
+
+  public static DeleteRecord create(String recordKey, String partitionPath) {
+    return create(recordKey, partitionPath, 0);
+  }
+
+  public static DeleteRecord create(String recordKey, String partitionPath, Comparable orderingVal) {
+    return create(new HoodieKey(recordKey, partitionPath), orderingVal);
+  }
+
+  public static DeleteRecord create(HoodieKey hoodieKey, Comparable orderingVal) {
+    return new DeleteRecord(hoodieKey, orderingVal);
+  }
+
+  public String getRecordKey() {
+    return hoodieKey.getRecordKey();
+  }
+
+  public String getPartitionPath() {
+    return hoodieKey.getPartitionPath();
+  }
+
+  public HoodieKey getHoodieKey() {
+    return hoodieKey;
+  }
+
+  public Comparable<?> getOrderingValue() {
+    return orderingVal;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof DeleteRecord)) {
+      return false;
+    }
+    DeleteRecord that = (DeleteRecord) o;
+    return this.hoodieKey.equals(that.hoodieKey) && this.orderingVal.equals(that.orderingVal);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(this.hoodieKey, this.orderingVal);
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("DeleteRecord {");
+    sb.append(" key=").append(hoodieKey);
+    sb.append(" orderingVal=").append(this.orderingVal);
+    sb.append('}');
+    return sb.toString();
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroPayload.java
index 04a873c98f29a..3fbcb8a620e0e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroPayload.java
@@ -36,17 +36,16 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
   // Store the GenericRecord converted to bytes - 1) Doesn't store schema hence memory efficient 2) Makes the payload
   // java serializable
   private final byte[] recordBytes;
+  private final Comparable<?> orderingVal;
 
   public HoodieAvroPayload(GenericRecord record, Comparable<?> orderingVal) {
-    this(Option.of(record));
+    this.recordBytes = record == null ? new byte[0] : HoodieAvroUtils.avroToBytes(record);
+    this.orderingVal = orderingVal;
   }
 
   public HoodieAvroPayload(Option<GenericRecord> record) {
-    if (record.isPresent()) {
-      this.recordBytes = HoodieAvroUtils.avroToBytes(record.get());
-    } else {
-      this.recordBytes = new byte[0];
-    }
+    this.recordBytes = record.isPresent() ? HoodieAvroUtils.avroToBytes(record.get()) : new byte[0];
+    this.orderingVal = 0;
   }
 
   @Override
@@ -71,4 +70,9 @@ public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
   public byte[] getRecordBytes() {
     return recordBytes;
   }
+
+  @Override
+  public Comparable<?> getOrderingValue() {
+    return orderingVal;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
index d098c4ff7c2b2..e3c5a70d5cf16 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
@@ -18,41 +18,38 @@
 
 package org.apache.hudi.common.model;
 
+import javax.annotation.Nullable;
 import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.Objects;
-import java.util.function.BiFunction;
 
 /**
- * Hoodie Range metadata.
+ * Hoodie metadata for the column range of data stored in columnar format (like Parquet)
+ *
+ * NOTE: {@link Comparable} is used as raw-type so that we can handle polymorphism, where
+ *        caller apriori is not aware of the type {@link HoodieColumnRangeMetadata} is
+ *        associated with
  */
-public class HoodieColumnRangeMetadata<T> implements Serializable {
+@SuppressWarnings("rawtype")
+public class HoodieColumnRangeMetadata<T extends Comparable> implements Serializable {
   private final String filePath;
   private final String columnName;
+  @Nullable
   private final T minValue;
+  @Nullable
   private final T maxValue;
   private final long nullCount;
   private final long valueCount;
   private final long totalSize;
   private final long totalUncompressedSize;
 
-  public static final BiFunction<HoodieColumnRangeMetadata<Comparable>, HoodieColumnRangeMetadata<Comparable>, HoodieColumnRangeMetadata<Comparable>> COLUMN_RANGE_MERGE_FUNCTION =
-      (oldColumnRange, newColumnRange) -> new HoodieColumnRangeMetadata<>(
-          newColumnRange.getFilePath(),
-          newColumnRange.getColumnName(),
-          (Comparable) Arrays.asList(oldColumnRange.getMinValue(), newColumnRange.getMinValue())
-              .stream().filter(Objects::nonNull).min(Comparator.naturalOrder()).orElse(null),
-          (Comparable) Arrays.asList(oldColumnRange.getMinValue(), newColumnRange.getMinValue())
-              .stream().filter(Objects::nonNull).max(Comparator.naturalOrder()).orElse(null),
-          oldColumnRange.getNullCount() + newColumnRange.getNullCount(),
-          oldColumnRange.getValueCount() + newColumnRange.getValueCount(),
-          oldColumnRange.getTotalSize() + newColumnRange.getTotalSize(),
-          oldColumnRange.getTotalUncompressedSize() + newColumnRange.getTotalUncompressedSize()
-      );
-
-  public HoodieColumnRangeMetadata(final String filePath, final String columnName, final T minValue, final T maxValue,
-                                   final long nullCount, long valueCount, long totalSize, long totalUncompressedSize) {
+  private HoodieColumnRangeMetadata(String filePath,
+                                    String columnName,
+                                    @Nullable T minValue,
+                                    @Nullable T maxValue,
+                                    long nullCount,
+                                    long valueCount,
+                                    long totalSize,
+                                    long totalUncompressedSize) {
     this.filePath = filePath;
     this.columnName = columnName;
     this.minValue = minValue;
@@ -71,10 +68,12 @@ public String getColumnName() {
     return this.columnName;
   }
 
+  @Nullable
   public T getMinValue() {
     return this.minValue;
   }
 
+  @Nullable
   public T getMaxValue() {
     return this.maxValue;
   }
@@ -133,17 +132,20 @@ public String toString() {
         + '}';
   }
 
-  /**
-   * Statistics that is collected in {@link org.apache.hudi.metadata.MetadataPartitionType#COLUMN_STATS} index.
-   */
-  public static final class Stats {
-    public static final String VALUE_COUNT = "value_count";
-    public static final String NULL_COUNT = "null_count";
-    public static final String MIN = "min";
-    public static final String MAX = "max";
-    public static final String TOTAL_SIZE = "total_size";
-    public static final String TOTAL_UNCOMPRESSED_SIZE = "total_uncompressed_size";
-
-    private Stats() {  }
+  public static <T extends Comparable<T>> HoodieColumnRangeMetadata<T> create(String filePath,
+                                                                              String columnName,
+                                                                              @Nullable T minValue,
+                                                                              @Nullable T maxValue,
+                                                                              long nullCount,
+                                                                              long valueCount,
+                                                                              long totalSize,
+                                                                              long totalUncompressedSize) {
+    return new HoodieColumnRangeMetadata<>(filePath, columnName, minValue, maxValue, nullCount, valueCount, totalSize, totalUncompressedSize);
+  }
+
+  @SuppressWarnings("rawtype")
+  public static HoodieColumnRangeMetadata<Comparable> stub(String filePath,
+                                                           String columnName) {
+    return new HoodieColumnRangeMetadata<>(filePath, columnName, null, null, -1, -1, -1, -1);
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
index c57965d727210..53ceb00409ac7 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
@@ -130,13 +130,27 @@ public WriteOperationType getOperationType() {
   public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
     HashMap<String, String> fullPaths = new HashMap<>();
     for (Map.Entry<String, String> entry : getFileIdAndRelativePaths().entrySet()) {
-      String fullPath =
-          (entry.getValue() != null) ? (FSUtils.getPartitionPath(basePath, entry.getValue())).toString() : null;
+      String fullPath = entry.getValue() != null
+          ? FSUtils.getPartitionPath(basePath, entry.getValue()).toString()
+          : null;
       fullPaths.put(entry.getKey(), fullPath);
     }
     return fullPaths;
   }
 
+  public List<String> getFullPathsByPartitionPath(String basePath, String partitionPath) {
+    HashSet<String> fullPaths = new HashSet<>();
+    if (getPartitionToWriteStats().get(partitionPath) != null) {
+      for (HoodieWriteStat stat : getPartitionToWriteStats().get(partitionPath)) {
+        if ((stat.getFileId() != null)) {
+          String fullPath = FSUtils.getPartitionPath(basePath, stat.getPath()).toString();
+          fullPaths.add(fullPath);
+        }
+      }
+    }
+    return new ArrayList<>(fullPaths);
+  }
+
   public Map<HoodieFileGroupId, String> getFileGroupIdAndFullPaths(String basePath) {
     Map<HoodieFileGroupId, String> fileGroupIdToFullPaths = new HashMap<>();
     for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java
index cf3bb52263366..9626e218a2247 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDeltaWriteStat.java
@@ -21,7 +21,6 @@
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import org.apache.hudi.common.util.Option;
 
-import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -30,13 +29,14 @@
  * Statistics about a single Hoodie delta log operation.
  */
 @JsonIgnoreProperties(ignoreUnknown = true)
+@SuppressWarnings("rawtypes")
 public class HoodieDeltaWriteStat extends HoodieWriteStat {
 
   private int logVersion;
   private long logOffset;
   private String baseFile;
   private List<String> logFiles = new ArrayList<>();
-  private Option<RecordsStats<? extends Map>> recordsStats = Option.empty();
+  private Option<Map<String, HoodieColumnRangeMetadata<Comparable>>> recordsStats = Option.empty();
 
   public void setLogVersion(int logVersion) {
     this.logVersion = logVersion;
@@ -74,23 +74,11 @@ public List<String> getLogFiles() {
     return logFiles;
   }
 
-  public void setRecordsStats(RecordsStats<? extends Map> stats) {
+  public void setRecordsStats(Map<String, HoodieColumnRangeMetadata<Comparable>> stats) {
     recordsStats = Option.of(stats);
   }
 
-  public Option<RecordsStats<? extends Map>> getRecordsStats() {
+  public Option<Map<String, HoodieColumnRangeMetadata<Comparable>>> getColumnStats() {
     return recordsStats;
   }
-
-  public static class RecordsStats<T> implements Serializable {
-    private final T recordsStats;
-
-    public RecordsStats(T recordsStats) {
-      this.recordsStats = recordsStats;
-    }
-
-    public T getStats() {
-      return recordsStats;
-    }
-  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
index faad46653ad25..93e9ea5d3433a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
@@ -18,26 +18,47 @@
 
 package org.apache.hudi.common.model;
 
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.avro.HoodieAvroWriteSupport;
+import org.apache.hudi.common.util.AvroOrcUtils;
+import org.apache.hudi.common.util.BaseFileUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
 
+import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Writer;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
 import java.util.Properties;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * The metadata that goes into the meta file in each partition.
  */
 public class HoodiePartitionMetadata {
 
-  public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
-  public static final String PARTITION_DEPTH_KEY = "partitionDepth";
+  public static final String HOODIE_PARTITION_METAFILE_PREFIX = ".hoodie_partition_metadata";
   public static final String COMMIT_TIME_KEY = "commitTime";
+  private static final String PARTITION_DEPTH_KEY = "partitionDepth";
+  private static final Logger LOG = LogManager.getLogger(HoodiePartitionMetadata.class);
 
   /**
    * Contents of the metadata.
@@ -51,7 +72,8 @@ public class HoodiePartitionMetadata {
 
   private final FileSystem fs;
 
-  private static final Logger LOG = LogManager.getLogger(HoodiePartitionMetadata.class);
+  // The format in which to write the partition metadata
+  private Option<HoodieFileFormat> format;
 
   /**
    * Construct metadata from existing partition.
@@ -60,13 +82,15 @@ public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
     this.fs = fs;
     this.props = new Properties();
     this.partitionPath = partitionPath;
+    this.format = Option.empty();
   }
 
   /**
    * Construct metadata object to be written out.
    */
-  public HoodiePartitionMetadata(FileSystem fs, String instantTime, Path basePath, Path partitionPath) {
+  public HoodiePartitionMetadata(FileSystem fs, String instantTime, Path basePath, Path partitionPath, Option<HoodieFileFormat> format) {
     this(fs, partitionPath);
+    this.format = format;
     props.setProperty(COMMIT_TIME_KEY, instantTime);
     props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
   }
@@ -82,21 +106,17 @@ public int getPartitionDepth() {
    * Write the metadata safely into partition atomically.
    */
   public void trySave(int taskPartitionId) {
+    String extension = getMetafileExtension();
     Path tmpMetaPath =
-        new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
-    Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
+        new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX + "_" + taskPartitionId + extension);
+    Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX + extension);
     boolean metafileExists = false;
 
     try {
       metafileExists = fs.exists(metaPath);
       if (!metafileExists) {
         // write to temporary file
-        FSDataOutputStream os = fs.create(tmpMetaPath, true);
-        props.store(os, "partition metadata");
-        os.hsync();
-        os.hflush();
-        os.close();
-
+        writeMetafile(tmpMetaPath);
         // move to actual path
         fs.rename(tmpMetaPath, metaPath);
       }
@@ -117,30 +137,172 @@ public void trySave(int taskPartitionId) {
     }
   }
 
+  private String getMetafileExtension() {
+    // To be backwards compatible, there is no extension to the properties file base partition metafile
+    return format.isPresent() ? format.get().getFileExtension() : StringUtils.EMPTY_STRING;
+  }
+
+  /**
+   * Write the partition metadata in the correct format in the given file path.
+   *
+   * @param filePath Path of the file to write
+   * @throws IOException
+   */
+  private void writeMetafile(Path filePath) throws IOException {
+    if (format.isPresent()) {
+      Schema schema = HoodieAvroUtils.getRecordKeySchema();
+
+      switch (format.get()) {
+        case PARQUET:
+          // Since we are only interested in saving metadata to the footer, the schema, blocksizes and other
+          // parameters are not important.
+          MessageType type = Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy");
+          HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(type, schema, Option.empty());
+          try (ParquetWriter writer = new ParquetWriter(filePath, writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) {
+            for (String key : props.stringPropertyNames()) {
+              writeSupport.addFooterMetadata(key, props.getProperty(key));
+            }
+          }
+          break;
+        case ORC:
+          // Since we are only interested in saving metadata to the footer, the schema, blocksizes and other
+          // parameters are not important.
+          OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(fs.getConf()).fileSystem(fs)
+              .setSchema(AvroOrcUtils.createOrcSchema(schema));
+          try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) {
+            for (String key : props.stringPropertyNames()) {
+              writer.addUserMetadata(key, ByteBuffer.wrap(props.getProperty(key).getBytes()));
+            }
+          }
+          break;
+        default:
+          throw new HoodieException("Unsupported format for partition metafiles: " + format.get());
+      }
+    } else {
+      // Backwards compatible properties file format
+      FSDataOutputStream os = fs.create(filePath, true);
+      props.store(os, "partition metadata");
+      os.hsync();
+      os.hflush();
+      os.close();
+    }
+  }
+
   /**
    * Read out the metadata for this partition.
    */
   public void readFromFS() throws IOException {
-    FSDataInputStream is = null;
-    try {
-      Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
-      is = fs.open(metaFile);
+    // first try reading the text format (legacy, currently widespread)
+    boolean readFile = readTextFormatMetaFile();
+    if (!readFile) {
+      // now try reading the base file formats.
+      readFile = readBaseFormatMetaFile();
+    }
+
+    // throw exception.
+    if (!readFile) {
+      throw new HoodieException("Unable to read any partition meta file to locate the table timeline.");
+    }
+  }
+
+  private boolean readTextFormatMetaFile() {
+    // Properties file format
+    Path metafilePath = textFormatMetaFilePath(partitionPath);
+    try (FSDataInputStream is = fs.open(metafilePath)) {
       props.load(is);
-    } catch (IOException ioe) {
-      throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
-    } finally {
-      if (is != null) {
-        is.close();
+      format = Option.empty();
+      return true;
+    } catch (Throwable t) {
+      LOG.warn("Unable to read partition meta properties file for partition " + partitionPath, t);
+      return false;
+    }
+  }
+
+  private boolean readBaseFormatMetaFile() {
+    for (Path metafilePath : baseFormatMetaFilePaths(partitionPath)) {
+      try {
+        BaseFileUtils reader = BaseFileUtils.getInstance(metafilePath.toString());
+        // Data file format
+        Map<String, String> metadata = reader.readFooter(fs.getConf(), true, metafilePath, PARTITION_DEPTH_KEY, COMMIT_TIME_KEY);
+        props.clear();
+        props.putAll(metadata);
+        format = Option.of(reader.getFormat());
+        return true;
+      } catch (Throwable t) {
+        // any error, log, check the next base format
+        LOG.warn("Unable to read partition metadata " + metafilePath.getName() + " for partition " + partitionPath, t);
       }
     }
+    return false;
+  }
+
+  /**
+   * Read out the COMMIT_TIME_KEY metadata for this partition.
+   */
+  public Option<String> readPartitionCreatedCommitTime() {
+    try {
+      if (!props.containsKey(COMMIT_TIME_KEY)) {
+        readFromFS();
+      }
+      return Option.of(props.getProperty(COMMIT_TIME_KEY));
+    } catch (IOException ioe) {
+      LOG.warn("Error fetch Hoodie partition metadata for " + partitionPath, ioe);
+      return Option.empty();
+    }
   }
 
   // methods related to partition meta data
   public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
     try {
-      return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
+      return textFormatMetaPathIfExists(fs, partitionPath).isPresent()
+          || baseFormatMetaPathIfExists(fs, partitionPath).isPresent();
+    } catch (IOException ioe) {
+      throw new HoodieIOException("Error checking presence of partition meta file for " + partitionPath, ioe);
+    }
+  }
+
+  /**
+   * Returns the name of the partition metadata.
+   *
+   * @return Name of the partition metafile or empty option
+   */
+  public static Option<Path> getPartitionMetafilePath(FileSystem fs, Path partitionPath) {
+    // The partition listing is a costly operation so instead we are searching for existence of the files instead.
+    // This is in expected order as properties file based partition metafiles should be the most common.
+    try {
+      Option<Path> textFormatPath = textFormatMetaPathIfExists(fs, partitionPath);
+      if (textFormatPath.isPresent()) {
+        return textFormatPath;
+      } else {
+        return baseFormatMetaPathIfExists(fs, partitionPath);
+      }
     } catch (IOException ioe) {
       throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath, ioe);
     }
   }
+
+  public static Option<Path> baseFormatMetaPathIfExists(FileSystem fs, Path partitionPath) throws IOException {
+    // Parquet should be more common than ORC so check it first
+    for (Path metafilePath : baseFormatMetaFilePaths(partitionPath)) {
+      if (fs.exists(metafilePath)) {
+        return Option.of(metafilePath);
+      }
+    }
+    return Option.empty();
+  }
+
+  public static Option<Path> textFormatMetaPathIfExists(FileSystem fs, Path partitionPath) throws IOException {
+    Path path = textFormatMetaFilePath(partitionPath);
+    return Option.ofNullable(fs.exists(path) ? path : null);
+  }
+
+  static Path textFormatMetaFilePath(Path partitionPath) {
+    return new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX);
+  }
+
+  static List<Path> baseFormatMetaFilePaths(Path partitionPath) {
+    return Stream.of(HoodieFileFormat.PARQUET.getFileExtension(), HoodieFileFormat.ORC.getFileExtension())
+        .map(ext -> new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX + ext))
+        .collect(Collectors.toList());
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java
index 36dd30b659dbf..6752607d2f48c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java
@@ -114,4 +114,16 @@ default Option<IndexedRecord> getInsertValue(Schema schema, Properties propertie
   default Option<Map<String, String>> getMetadata() {
     return Option.empty();
   }
+
+  /**
+   * This method can be used to extract the ordering value of the payload for combining/merging,
+   * or 0 if no value is specified which means natural order(arrival time is used).
+   *
+   * @return the ordering value
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
+  default Comparable<?> getOrderingValue() {
+    // default natural order
+    return 0;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
index 7b7bd6c6b2e5e..d8469ed5a148e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
@@ -105,4 +105,9 @@ public Boolean overwriteField(Object value, Object defaultValue) {
     }
     return Objects.equals(value, defaultValue);
   }
+
+  @Override
+  public Comparable<?> getOrderingValue() {
+    return this.orderingVal;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java b/hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
index b5a3cc002366e..f2f3809cf5c3a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
@@ -48,6 +48,12 @@ public enum WriteOperationType {
   INSERT_OVERWRITE_TABLE("insert_overwrite_table"),
   // compact
   COMPACT("compact"),
+
+  INDEX("index"),
+
+  // alter schema
+  ALTER_SCHEMA("alter_schema"),
+
   // used for old version
   UNKNOWN("unknown");
 
@@ -86,6 +92,10 @@ public static WriteOperationType fromValue(String value) {
         return CLUSTER;
       case "compact":
         return COMPACT;
+      case "index":
+        return INDEX;
+      case "alter_schema":
+        return ALTER_SCHEMA;
       case "unknown":
         return UNKNOWN;
       default:
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
index 6b8103db3a663..254044bd28371 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
@@ -36,6 +36,7 @@
 import org.apache.hudi.common.util.BinaryUtil;
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
@@ -189,6 +190,17 @@ public class HoodieTableConfig extends HoodieConfig {
       .defaultValue(HoodieTimelineTimeZone.LOCAL)
       .withDocumentation("User can set hoodie commit timeline timezone, such as utc, local and so on. local is default");
 
+  public static final ConfigProperty<Boolean> PARTITION_METAFILE_USE_BASE_FORMAT = ConfigProperty
+      .key("hoodie.partition.metafile.use.base.format")
+      .defaultValue(false)
+      .withDocumentation("If true, partition metafiles are saved in the same format as basefiles for this dataset (e.g. Parquet / ORC). "
+          + "If false (default) partition metafiles are saved as properties files.");
+
+  public static final ConfigProperty<Boolean> DROP_PARTITION_COLUMNS = ConfigProperty
+      .key("hoodie.datasource.write.drop.partition.columns")
+      .defaultValue(false)
+      .withDocumentation("When set to true, will not write the partition columns into hudi. By default, false.");
+
   public static final ConfigProperty<String> URL_ENCODE_PARTITIONING = KeyGeneratorOptions.URL_ENCODE_PARTITIONING;
   public static final ConfigProperty<String> HIVE_STYLE_PARTITIONING_ENABLE = KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE;
 
@@ -208,6 +220,20 @@ public class HoodieTableConfig extends HoodieConfig {
       .sinceVersion("0.11.0")
       .withDocumentation("Table checksum is used to guard against partial writes in HDFS. It is added as the last entry in hoodie.properties and then used to validate while reading table config.");
 
+  public static final ConfigProperty<String> TABLE_METADATA_PARTITIONS_INFLIGHT = ConfigProperty
+      .key("hoodie.table.metadata.partitions.inflight")
+      .noDefaultValue()
+      .sinceVersion("0.11.0")
+      .withDocumentation("Comma-separated list of metadata partitions whose building is in progress. "
+          + "These partitions are not yet ready for use by the readers.");
+
+  public static final ConfigProperty<String> TABLE_METADATA_PARTITIONS = ConfigProperty
+      .key("hoodie.table.metadata.partitions")
+      .noDefaultValue()
+      .sinceVersion("0.11.0")
+      .withDocumentation("Comma-separated list of metadata partitions that have been completely built and in-sync with data table. "
+          + "These partitions are ready for use by the readers");
+
   private static final String TABLE_CHECKSUM_FORMAT = "%s.%s"; // <database_name>.<table_name>
 
   public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName) {
@@ -405,6 +431,9 @@ public static void create(FileSystem fs, Path metadataFolder, Properties propert
       if (hoodieConfig.contains(TIMELINE_TIMEZONE)) {
         HoodieInstantTimeGenerator.setCommitTimeZone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getString(TIMELINE_TIMEZONE)));
       }
+
+      hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS);
+
       storeProperties(hoodieConfig.getProps(), outputStream);
     }
   }
@@ -578,6 +607,10 @@ public String getUrlEncodePartitioning() {
     return getString(URL_ENCODE_PARTITIONING);
   }
 
+  public Boolean isDropPartitionColumns() {
+    return getBooleanOrDefault(DROP_PARTITION_COLUMNS);
+  }
+
   /**
    * Read the table checksum.
    */
@@ -585,6 +618,30 @@ private Long getTableChecksum() {
     return getLong(TABLE_CHECKSUM);
   }
 
+  public List<String> getMetadataPartitionsInflight() {
+    return StringUtils.split(
+        getStringOrDefault(TABLE_METADATA_PARTITIONS_INFLIGHT, StringUtils.EMPTY_STRING),
+        CONFIG_VALUES_DELIMITER
+    );
+  }
+
+  public List<String> getMetadataPartitions() {
+    return StringUtils.split(
+        getStringOrDefault(TABLE_METADATA_PARTITIONS, StringUtils.EMPTY_STRING),
+        CONFIG_VALUES_DELIMITER
+    );
+  }
+  
+  /**
+   * Returns the format to use for partition meta files.
+   */
+  public Option<HoodieFileFormat> getPartitionMetafileFormat() {
+    if (getBooleanOrDefault(PARTITION_METAFILE_USE_BASE_FORMAT)) {
+      return Option.of(getBaseFileFormat());
+    }
+    return Option.empty();
+  }
+
   public Map<String, String> propsMap() {
     return props.entrySet().stream()
         .collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
index 3de93005da66d..38b5509cd577f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
@@ -90,6 +90,8 @@ public class HoodieTableMetaClient implements Serializable {
   public static final String BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + Path.SEPARATOR
       + ".fileids";
 
+  public static final String SCHEMA_FOLDER_NAME = ".schema";
+
   public static final String MARKER_EXTN = ".marker";
 
   private String basePath;
@@ -192,6 +194,13 @@ public String getColumnStatsIndexPath() {
     return new Path(metaPath, COLUMN_STATISTICS_INDEX_NAME).toString();
   }
 
+  /**
+   * @return schema folder path
+   */
+  public String getSchemaFolderName() {
+    return new Path(metaPath, SCHEMA_FOLDER_NAME).toString();
+  }
+
   /**
    * @return Temp Folder path
    */
@@ -392,6 +401,11 @@ public static HoodieTableMetaClient initTableAndGetMetaClient(Configuration hado
     if (!fs.exists(metaPathDir)) {
       fs.mkdirs(metaPathDir);
     }
+    // create schema folder
+    Path schemaPathDir = new Path(metaPathDir, SCHEMA_FOLDER_NAME);
+    if (!fs.exists(schemaPathDir)) {
+      fs.mkdirs(schemaPathDir);
+    }
 
     // if anything other than default archive log folder is specified, create that too
     String archiveLogPropVal = new HoodieConfig(props).getStringOrDefault(HoodieTableConfig.ARCHIVELOG_FOLDER);
@@ -685,6 +699,8 @@ public static class PropertyBuilder {
     private Boolean hiveStylePartitioningEnable;
     private Boolean urlEncodePartitioning;
     private HoodieTimelineTimeZone commitTimeZone;
+    private Boolean partitionMetafileUseBaseFormat;
+    private Boolean dropPartitionColumnsWhenWrite;
 
     /**
      * Persist the configs that is written at the first time, and should not be changed.
@@ -799,6 +815,16 @@ public PropertyBuilder setCommitTimezone(HoodieTimelineTimeZone timelineTimeZone
       return this;
     }
 
+    public PropertyBuilder setPartitionMetafileUseBaseFormat(Boolean useBaseFormat) {
+      this.partitionMetafileUseBaseFormat = useBaseFormat;
+      return this;
+    }
+
+    public PropertyBuilder setDropPartitionColumnsWhenWrite(Boolean dropPartitionColumnsWhenWrite) {
+      this.dropPartitionColumnsWhenWrite = dropPartitionColumnsWhenWrite;
+      return this;
+    }
+
     public PropertyBuilder set(String key, Object value) {
       if (HoodieTableConfig.PERSISTED_CONFIG_LIST.contains(key)) {
         this.others.put(key, value);
@@ -894,6 +920,13 @@ public PropertyBuilder fromProperties(Properties properties) {
       if (hoodieConfig.contains(HoodieTableConfig.URL_ENCODE_PARTITIONING)) {
         setUrlEncodePartitioning(hoodieConfig.getBoolean(HoodieTableConfig.URL_ENCODE_PARTITIONING));
       }
+      if (hoodieConfig.contains(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT)) {
+        setPartitionMetafileUseBaseFormat(hoodieConfig.getBoolean(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT));
+      }
+
+      if (hoodieConfig.contains(HoodieTableConfig.DROP_PARTITION_COLUMNS)) {
+        setDropPartitionColumnsWhenWrite(hoodieConfig.getBoolean(HoodieTableConfig.DROP_PARTITION_COLUMNS));
+      }
       return this;
     }
 
@@ -972,6 +1005,13 @@ public Properties build() {
       if (null != commitTimeZone) {
         tableConfig.setValue(HoodieTableConfig.TIMELINE_TIMEZONE, commitTimeZone.toString());
       }
+      if (null != partitionMetafileUseBaseFormat) {
+        tableConfig.setValue(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT, partitionMetafileUseBaseFormat.toString());
+      }
+
+      if (null != dropPartitionColumnsWhenWrite) {
+        tableConfig.setValue(HoodieTableConfig.DROP_PARTITION_COLUMNS, Boolean.toString(dropPartitionColumnsWhenWrite));
+      }
       return tableConfig.getProps();
     }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
index 2c107694a1b28..262157a8aefa7 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
@@ -18,13 +18,16 @@
 
 package org.apache.hudi.common.table;
 
+import org.apache.avro.JsonProperties;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.SchemaCompatibility;
 import org.apache.avro.generic.IndexedRecord;
+
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFileFormat;
@@ -42,12 +45,17 @@
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIncompatibleSchemaException;
 import org.apache.hudi.exception.InvalidTableException;
 import org.apache.hudi.io.storage.HoodieHFileReader;
-
 import org.apache.hudi.io.storage.HoodieOrcReader;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+
 import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.ParquetFileReader;
@@ -55,6 +63,9 @@
 import org.apache.parquet.schema.MessageType;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 /**
  * Helper class to read schema from data files and log files and to convert it between different formats.
@@ -156,23 +167,67 @@ public Schema getTableAvroSchema() throws Exception {
    * @throws Exception
    */
   public Schema getTableAvroSchema(boolean includeMetadataFields) throws Exception {
+    Schema schema;
     Option<Schema> schemaFromCommitMetadata = getTableSchemaFromCommitMetadata(includeMetadataFields);
     if (schemaFromCommitMetadata.isPresent()) {
-      return schemaFromCommitMetadata.get();
-    }
-    Option<Schema> schemaFromTableConfig = metaClient.getTableConfig().getTableCreateSchema();
-    if (schemaFromTableConfig.isPresent()) {
-      if (includeMetadataFields) {
-        return HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), hasOperationField);
+      schema = schemaFromCommitMetadata.get();
+    } else {
+      Option<Schema> schemaFromTableConfig = metaClient.getTableConfig().getTableCreateSchema();
+      if (schemaFromTableConfig.isPresent()) {
+        if (includeMetadataFields) {
+          schema = HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), hasOperationField);
+        } else {
+          schema = schemaFromTableConfig.get();
+        }
       } else {
-        return schemaFromTableConfig.get();
+        if (includeMetadataFields) {
+          schema = getTableAvroSchemaFromDataFile();
+        } else {
+          schema = HoodieAvroUtils.removeMetadataFields(getTableAvroSchemaFromDataFile());
+        }
       }
     }
-    if (includeMetadataFields) {
-      return getTableAvroSchemaFromDataFile();
-    } else {
-      return HoodieAvroUtils.removeMetadataFields(getTableAvroSchemaFromDataFile());
+
+    Option<String[]> partitionFieldsOpt = metaClient.getTableConfig().getPartitionFields();
+    if (metaClient.getTableConfig().isDropPartitionColumns()) {
+      schema = recreateSchemaWhenDropPartitionColumns(partitionFieldsOpt, schema);
+    }
+    return schema;
+  }
+
+  public static Schema recreateSchemaWhenDropPartitionColumns(Option<String[]> partitionFieldsOpt, Schema originSchema) {
+    // when hoodie.datasource.write.drop.partition.columns is true, partition columns can't be persisted in data files.
+    // And there are no partition schema if the schema is parsed from data files.
+    // Here we create partition Fields for this case, and use StringType as the data type.
+    Schema schema = originSchema;
+    if (partitionFieldsOpt.isPresent() && partitionFieldsOpt.get().length != 0) {
+      List<String> partitionFields = Arrays.asList(partitionFieldsOpt.get());
+
+      final Schema schema0 = originSchema;
+      boolean hasPartitionColNotInSchema = partitionFields.stream().anyMatch(
+          pt -> !HoodieAvroUtils.containsFieldInSchema(schema0, pt)
+      );
+      boolean hasPartitionColInSchema = partitionFields.stream().anyMatch(
+          pt -> HoodieAvroUtils.containsFieldInSchema(schema0, pt)
+      );
+      if (hasPartitionColNotInSchema && hasPartitionColInSchema) {
+        throw new HoodieIncompatibleSchemaException(
+            "Not support: Partial partition fields are still in the schema "
+                + "when enable hoodie.datasource.write.drop.partition.columns");
+      }
+
+      if (hasPartitionColNotInSchema) {
+        // when hasPartitionColNotInSchema is true and hasPartitionColInSchema is false, all partition columns
+        // are not in originSchema. So we create and add them.
+        List<Field> newFields = new ArrayList<>();
+        for (String partitionField: partitionFields) {
+          newFields.add(new Schema.Field(
+              partitionField, Schema.create(Schema.Type.STRING), "", JsonProperties.NULL_VALUE));
+        }
+        schema = HoodieAvroUtils.createNewSchemaWithExtraFields(schema, newFields);
+      }
     }
+    return schema;
   }
 
   /**
@@ -418,25 +473,6 @@ public Schema getLatestSchema(Schema writeSchema, boolean convertTableSchemaToAd
     return latestSchema;
   }
 
-
-  /**
-   * Get Last commit's Metadata.
-   */
-  public Option<HoodieCommitMetadata> getLatestCommitMetadata() {
-    try {
-      HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
-      if (timeline.lastInstant().isPresent()) {
-        HoodieInstant instant = timeline.lastInstant().get();
-        byte[] data = timeline.getInstantDetails(instant).get();
-        return Option.of(HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class));
-      } else {
-        return Option.empty();
-      }
-    } catch (Exception e) {
-      throw new HoodieException("Failed to get commit metadata", e);
-    }
-  }
-
   /**
    * Read the parquet schema from a parquet File.
    */
@@ -534,4 +570,51 @@ private boolean hasOperationField() {
       return false;
     }
   }
+
+  /**
+   * Gets the InternalSchema for a hoodie table from the HoodieCommitMetadata of the instant.
+   *
+   * @return InternalSchema for this table
+   */
+  public Option<InternalSchema> getTableInternalSchemaFromCommitMetadata() {
+    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
+    if (timeline.lastInstant().isPresent()) {
+      return getTableInternalSchemaFromCommitMetadata(timeline.lastInstant().get());
+    } else {
+      return Option.empty();
+    }
+  }
+
+  /**
+   * Gets the InternalSchema for a hoodie table from the HoodieCommitMetadata of the instant.
+   *
+   * @return InternalSchema for this table
+   */
+  private Option<InternalSchema> getTableInternalSchemaFromCommitMetadata(HoodieInstant instant) {
+    try {
+      HoodieTimeline timeline = metaClient.getActiveTimeline().filterCompletedInstants();
+      byte[] data = timeline.getInstantDetails(instant).get();
+      HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
+      String latestInternalSchemaStr = metadata.getMetadata(SerDeHelper.LATEST_SCHEMA);
+      if (latestInternalSchemaStr != null) {
+        return SerDeHelper.fromJson(latestInternalSchemaStr);
+      } else {
+        return Option.empty();
+      }
+    } catch (Exception e) {
+      throw new HoodieException("Failed to read schema from commit metadata", e);
+    }
+  }
+
+  /**
+   * Gets the history schemas as String for a hoodie table from the HoodieCommitMetadata of the instant.
+   *
+   * @return history schemas string for this table
+   */
+  public Option<String> getTableHistorySchemaStrFromCommitMetadata() {
+    // now we only support FileBaseInternalSchemaManager
+    FileBasedInternalSchemaStorageManager manager = new FileBasedInternalSchemaStorageManager(metaClient);
+    String result = manager.getHistorySchemaStr();
+    return result.isEmpty() ? Option.empty() : Option.of(result);
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
index fa5117e41fa76..9e56083b262e0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
@@ -18,8 +18,9 @@
 
 package org.apache.hudi.common.table.log;
 
+import org.apache.hudi.common.model.DeleteRecord;
+import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieAvroRecord;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -36,6 +37,7 @@
 import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SpillableMapUtils;
+import org.apache.hudi.common.util.InternalSchemaCache;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
@@ -46,13 +48,15 @@
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.action.InternalSchemaMerger;
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
 import java.util.ArrayDeque;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.Deque;
 import java.util.HashSet;
 import java.util.List;
@@ -109,6 +113,10 @@ public abstract class AbstractHoodieLogRecordReader {
   private final FileSystem fs;
   // Total log files read - for metrics
   private AtomicLong totalLogFiles = new AtomicLong(0);
+  // Internal schema, used to support full schema evolution.
+  private InternalSchema internalSchema;
+  // Hoodie table path.
+  private final String path;
   // Total log blocks read - for metrics
   private AtomicLong totalLogBlocks = new AtomicLong(0);
   // Total log records read - for metrics
@@ -120,7 +128,7 @@ public abstract class AbstractHoodieLogRecordReader {
   // Store the last instant log blocks (needed to implement rollback)
   private Deque<HoodieLogBlock> currentInstantLogBlocks = new ArrayDeque<>();
   // Enables full scan of log records
-  protected final boolean enableFullScan;
+  protected final boolean forceFullScan;
   private int totalScannedLogFiles;
   // Progress
   private float progress = 0.0f;
@@ -135,14 +143,14 @@ protected AbstractHoodieLogRecordReader(FileSystem fs, String basePath, List<Str
                                           int bufferSize, Option<InstantRange> instantRange,
                                           boolean withOperationField) {
     this(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize,
-        instantRange, withOperationField, true, Option.empty());
+        instantRange, withOperationField, true, Option.empty(), InternalSchema.getEmptyInternalSchema());
   }
 
   protected AbstractHoodieLogRecordReader(FileSystem fs, String basePath, List<String> logFilePaths,
                                           Schema readerSchema, String latestInstantTime, boolean readBlocksLazily,
                                           boolean reverseReader, int bufferSize, Option<InstantRange> instantRange,
-                                          boolean withOperationField, boolean enableFullScan,
-                                          Option<String> partitionName) {
+                                          boolean withOperationField, boolean forceFullScan,
+                                          Option<String> partitionName, InternalSchema internalSchema) {
     this.readerSchema = readerSchema;
     this.latestInstantTime = latestInstantTime;
     this.hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build();
@@ -158,7 +166,9 @@ protected AbstractHoodieLogRecordReader(FileSystem fs, String basePath, List<Str
     this.bufferSize = bufferSize;
     this.instantRange = instantRange;
     this.withOperationField = withOperationField;
-    this.enableFullScan = enableFullScan;
+    this.forceFullScan = forceFullScan;
+    this.internalSchema = internalSchema == null ? InternalSchema.getEmptyInternalSchema() : internalSchema;
+    this.path = basePath;
 
     // Key fields when populate meta fields is disabled (that is, virtual keys enabled)
     if (!tableConfig.populateMetaFields()) {
@@ -178,10 +188,14 @@ protected String getKeyField() {
   }
 
   public synchronized void scan() {
-    scan(Option.empty());
+    scanInternal(Option.empty());
   }
 
-  public synchronized void scan(Option<List<String>> keys) {
+  public synchronized void scan(List<String> keys) {
+    scanInternal(Option.of(new KeySpec(keys, true)));
+  }
+
+  protected synchronized void scanInternal(Option<KeySpec> keySpecOpt) {
     currentInstantLogBlocks = new ArrayDeque<>();
     progress = 0.0f;
     totalLogFiles = new AtomicLong(0);
@@ -194,15 +208,16 @@ public synchronized void scan(Option<List<String>> keys) {
     HoodieTimeline completedInstantsTimeline = commitsTimeline.filterCompletedInstants();
     HoodieTimeline inflightInstantsTimeline = commitsTimeline.filterInflights();
     try {
-
       // Get the key field based on populate meta fields config
       // and the table type
       final String keyField = getKeyField();
 
       // Iterate over the paths
+      boolean enableRecordLookups = !forceFullScan;
       logFormatReaderWrapper = new HoodieLogFormatReader(fs,
           logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile))).collect(Collectors.toList()),
-          readerSchema, readBlocksLazily, reverseReader, bufferSize, !enableFullScan, keyField);
+          readerSchema, readBlocksLazily, reverseReader, bufferSize, enableRecordLookups, keyField, internalSchema);
+
       Set<HoodieLogFile> scannedLogFiles = new HashSet<>();
       while (logFormatReaderWrapper.hasNext()) {
         HoodieLogFile logFile = logFormatReaderWrapper.getLogFile();
@@ -239,7 +254,7 @@ public synchronized void scan(Option<List<String>> keys) {
             if (isNewInstantBlock(logBlock) && !readBlocksLazily) {
               // If this is an avro data block belonging to a different commit/instant,
               // then merge the last blocks and records into the main result
-              processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keys);
+              processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keySpecOpt);
             }
             // store the current block
             currentInstantLogBlocks.push(logBlock);
@@ -249,7 +264,7 @@ public synchronized void scan(Option<List<String>> keys) {
             if (isNewInstantBlock(logBlock) && !readBlocksLazily) {
               // If this is a delete data block belonging to a different commit/instant,
               // then merge the last blocks and records into the main result
-              processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keys);
+              processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keySpecOpt);
             }
             // store deletes so can be rolled back
             currentInstantLogBlocks.push(logBlock);
@@ -324,7 +339,7 @@ public synchronized void scan(Option<List<String>> keys) {
       // merge the last read block when all the blocks are done reading
       if (!currentInstantLogBlocks.isEmpty()) {
         LOG.info("Merging the final data blocks");
-        processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keys);
+        processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keySpecOpt);
       }
       // Done
       progress = 1.0f;
@@ -359,10 +374,12 @@ private boolean isNewInstantBlock(HoodieLogBlock logBlock) {
    * Iterate over the GenericRecord in the block, read the hoodie key and partition path and call subclass processors to
    * handle it.
    */
-  private void processDataBlock(HoodieDataBlock dataBlock, Option<List<String>> keys) throws Exception {
-    try (ClosableIterator<IndexedRecord> recordItr = dataBlock.getRecordItr(keys.orElse(Collections.emptyList()))) {
-      while (recordItr.hasNext()) {
-        IndexedRecord record = recordItr.next();
+  private void processDataBlock(HoodieDataBlock dataBlock, Option<KeySpec> keySpecOpt) throws Exception {
+    try (ClosableIterator<IndexedRecord> recordIterator = getRecordsIterator(dataBlock, keySpecOpt)) {
+      Option<Schema> schemaOption = getMergedSchema(dataBlock);
+      while (recordIterator.hasNext()) {
+        IndexedRecord currentRecord = recordIterator.next();
+        IndexedRecord record = schemaOption.isPresent() ? HoodieAvroUtils.rewriteRecordWithNewSchema(currentRecord, schemaOption.get()) : currentRecord;
         processNextRecord(createHoodieRecord(record, this.hoodieTableMetaClient.getTableConfig(), this.payloadClassFQN,
             this.preCombineField, this.withOperationField, this.simpleKeyGenFields, this.partitionName));
         totalLogRecords.incrementAndGet();
@@ -370,6 +387,28 @@ private void processDataBlock(HoodieDataBlock dataBlock, Option<List<String>> ke
     }
   }
 
+  /**
+   * Get final Read Schema for support evolution.
+   * step1: find the fileSchema for current dataBlock.
+   * step2: determine whether fileSchema is compatible with the final read internalSchema.
+   * step3: merge fileSchema and read internalSchema to produce final read schema.
+   *
+   * @param dataBlock current processed block
+   * @return final read schema.
+   */
+  private Option<Schema> getMergedSchema(HoodieDataBlock dataBlock) {
+    Option<Schema> result = Option.empty();
+    if (!internalSchema.isEmptySchema()) {
+      Long currentInstantTime = Long.parseLong(dataBlock.getLogBlockHeader().get(INSTANT_TIME));
+      InternalSchema fileSchema = InternalSchemaCache
+          .searchSchemaAndCache(currentInstantTime, hoodieTableMetaClient, false);
+      Schema mergeSchema = AvroInternalSchemaConverter
+          .convert(new InternalSchemaMerger(fileSchema, internalSchema, true, false).mergeSchema(), readerSchema.getName());
+      result = Option.of(mergeSchema);
+    }
+    return result;
+  }
+
   /**
    * Create @{@link HoodieRecord} from the @{@link IndexedRecord}.
    *
@@ -404,33 +443,33 @@ protected HoodieAvroRecord<?> createHoodieRecord(final IndexedRecord rec, final
   protected abstract void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws Exception;
 
   /**
-   * Process next deleted key.
+   * Process next deleted record.
    *
-   * @param key Deleted record key
+   * @param deleteRecord Deleted record(hoodie key and ordering value)
    */
-  protected abstract void processNextDeletedKey(HoodieKey key);
+  protected abstract void processNextDeletedRecord(DeleteRecord deleteRecord);
 
   /**
    * Process the set of log blocks belonging to the last instant which is read fully.
    */
   private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> logBlocks, int numLogFilesSeen,
-                                             Option<List<String>> keys) throws Exception {
+                                             Option<KeySpec> keySpecOpt) throws Exception {
     while (!logBlocks.isEmpty()) {
       LOG.info("Number of remaining logblocks to merge " + logBlocks.size());
       // poll the element at the bottom of the stack since that's the order it was inserted
       HoodieLogBlock lastBlock = logBlocks.pollLast();
       switch (lastBlock.getBlockType()) {
         case AVRO_DATA_BLOCK:
-          processDataBlock((HoodieAvroDataBlock) lastBlock, keys);
+          processDataBlock((HoodieAvroDataBlock) lastBlock, keySpecOpt);
           break;
         case HFILE_DATA_BLOCK:
-          processDataBlock((HoodieHFileDataBlock) lastBlock, keys);
+          processDataBlock((HoodieHFileDataBlock) lastBlock, keySpecOpt);
           break;
         case PARQUET_DATA_BLOCK:
-          processDataBlock((HoodieParquetDataBlock) lastBlock, keys);
+          processDataBlock((HoodieParquetDataBlock) lastBlock, keySpecOpt);
           break;
         case DELETE_BLOCK:
-          Arrays.stream(((HoodieDeleteBlock) lastBlock).getKeysToDelete()).forEach(this::processNextDeletedKey);
+          Arrays.stream(((HoodieDeleteBlock) lastBlock).getRecordsToDelete()).forEach(this::processNextDeletedRecord);
           break;
         case CORRUPT_BLOCK:
           LOG.warn("Found a corrupt block which was not rolled back");
@@ -443,6 +482,15 @@ private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> logBlocks, int
     progress = numLogFilesSeen - 1 / logFilePaths.size();
   }
 
+  private ClosableIterator<IndexedRecord> getRecordsIterator(HoodieDataBlock dataBlock, Option<KeySpec> keySpecOpt) throws IOException {
+    if (keySpecOpt.isPresent()) {
+      KeySpec keySpec = keySpecOpt.get();
+      return dataBlock.getRecordIterator(keySpec.keys, keySpec.fullKey);
+    }
+
+    return dataBlock.getRecordIterator();
+  }
+
   /**
    * Return progress of scanning as a float between 0.0 to 1.0.
    */
@@ -466,7 +514,7 @@ protected String getPayloadClassFQN() {
     return payloadClassFQN;
   }
 
-  protected Option<String> getPartitionName() {
+  public Option<String> getPartitionName() {
     return partitionName;
   }
 
@@ -482,6 +530,16 @@ public boolean isWithOperationField() {
     return withOperationField;
   }
 
+  protected static class KeySpec {
+    private final List<String> keys;
+    private final boolean fullKey;
+
+    public KeySpec(List<String> keys, boolean fullKey) {
+      this.keys = keys;
+      this.fullKey = fullKey;
+    }
+  }
+
   /**
    * Builder used to build {@code AbstractHoodieLogRecordScanner}.
    */
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
index 07cb36bb169bb..af9bcd27f933c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
@@ -36,6 +36,7 @@
 import org.apache.hudi.exception.CorruptedLogFileException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
+import org.apache.hudi.internal.schema.InternalSchema;
 
 import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
@@ -44,6 +45,7 @@
 import org.apache.hadoop.fs.FSInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hbase.util.Bytes;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -74,6 +76,7 @@ public class HoodieLogFileReader implements HoodieLogFormat.Reader {
   private final HoodieLogFile logFile;
   private final byte[] magicBuffer = new byte[6];
   private final Schema readerSchema;
+  private InternalSchema internalSchema = InternalSchema.getEmptyInternalSchema();
   private final String keyField;
   private boolean readBlockLazily;
   private long reverseLogFilePosition;
@@ -97,6 +100,12 @@ public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSc
   public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
                              boolean readBlockLazily, boolean reverseReader, boolean enableRecordLookups,
                              String keyField) throws IOException {
+    this(fs, logFile, readerSchema, bufferSize, readBlockLazily, reverseReader, enableRecordLookups, keyField, InternalSchema.getEmptyInternalSchema());
+  }
+
+  public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
+                             boolean readBlockLazily, boolean reverseReader, boolean enableRecordLookups,
+                             String keyField, InternalSchema internalSchema) throws IOException {
     this.hadoopConf = fs.getConf();
     // NOTE: We repackage {@code HoodieLogFile} here to make sure that the provided path
     //       is prefixed with an appropriate scheme given that we're not propagating the FS
@@ -108,6 +117,7 @@ public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSc
     this.reverseReader = reverseReader;
     this.enableRecordLookups = enableRecordLookups;
     this.keyField = keyField;
+    this.internalSchema = internalSchema == null ? InternalSchema.getEmptyInternalSchema() : internalSchema;
     if (this.reverseReader) {
       this.reverseLogFilePosition = this.lastReverseLogFilePosition = this.logFile.getFileSize();
     }
@@ -197,10 +207,10 @@ private HoodieLogBlock readBlock() throws IOException {
     switch (Objects.requireNonNull(blockType)) {
       case AVRO_DATA_BLOCK:
         if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
-          return HoodieAvroDataBlock.getBlock(content.get(), readerSchema);
+          return HoodieAvroDataBlock.getBlock(content.get(), readerSchema, internalSchema);
         } else {
           return new HoodieAvroDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc,
-              Option.ofNullable(readerSchema), header, footer, keyField);
+              Option.ofNullable(readerSchema), header, footer, keyField, internalSchema);
         }
 
       case HFILE_DATA_BLOCK:
@@ -208,7 +218,7 @@ private HoodieLogBlock readBlock() throws IOException {
             String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
 
         return new HoodieHFileDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc,
-            Option.ofNullable(readerSchema), header, footer, enableRecordLookups);
+            Option.ofNullable(readerSchema), header, footer, enableRecordLookups, logFile.getPath());
 
       case PARQUET_DATA_BLOCK:
         checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION,
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java
index d4a173d069c79..c48107e392515 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java
@@ -24,6 +24,7 @@
 
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hudi.internal.schema.InternalSchema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -42,6 +43,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
   private HoodieLogFileReader currentReader;
   private final FileSystem fs;
   private final Schema readerSchema;
+  private InternalSchema internalSchema = InternalSchema.getEmptyInternalSchema();
   private final boolean readBlocksLazily;
   private final boolean reverseLogReader;
   private final String recordKeyField;
@@ -51,8 +53,8 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
   private static final Logger LOG = LogManager.getLogger(HoodieLogFormatReader.class);
 
   HoodieLogFormatReader(FileSystem fs, List<HoodieLogFile> logFiles, Schema readerSchema, boolean readBlocksLazily,
-                        boolean reverseLogReader, int bufferSize, boolean enableInlineReading,
-                        String recordKeyField) throws IOException {
+                        boolean reverseLogReader, int bufferSize, boolean enableRecordLookups,
+                        String recordKeyField, InternalSchema internalSchema) throws IOException {
     this.logFiles = logFiles;
     this.fs = fs;
     this.readerSchema = readerSchema;
@@ -61,11 +63,12 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
     this.bufferSize = bufferSize;
     this.prevReadersInOpenState = new ArrayList<>();
     this.recordKeyField = recordKeyField;
-    this.enableInlineReading = enableInlineReading;
+    this.enableInlineReading = enableRecordLookups;
+    this.internalSchema = internalSchema == null ? InternalSchema.getEmptyInternalSchema() : internalSchema;
     if (logFiles.size() > 0) {
       HoodieLogFile nextLogFile = logFiles.remove(0);
       this.currentReader = new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily, false,
-          enableInlineReading, recordKeyField);
+          enableRecordLookups, recordKeyField, internalSchema);
     }
   }
 
@@ -105,7 +108,7 @@ public boolean hasNext() {
           this.prevReadersInOpenState.add(currentReader);
         }
         this.currentReader = new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily, false,
-            enableInlineReading, recordKeyField);
+            enableInlineReading, recordKeyField, internalSchema);
       } catch (IOException io) {
         throw new HoodieIOException("unable to initialize read with log file ", io);
       }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
index 882e1057c8043..ed18736443288 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.table.log;
 
 import org.apache.hudi.common.config.HoodieCommonConfig;
+import org.apache.hudi.common.model.DeleteRecord;
 import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieOperation;
@@ -28,12 +29,14 @@
 import org.apache.hudi.common.util.HoodieRecordSizeEstimator;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.SpillableMapUtils;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.exception.HoodieIOException;
 
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hudi.internal.schema.InternalSchema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -42,6 +45,8 @@
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
+
 /**
  * Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of records which will
  * be used as a lookup table when merging the base columnar file with the redo log file.
@@ -73,14 +78,14 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordReader
   protected HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
                                          String latestInstantTime, Long maxMemorySizeInBytes, boolean readBlocksLazily,
                                          boolean reverseReader, int bufferSize, String spillableMapBasePath,
-                                         Option<InstantRange> instantRange, boolean autoScan,
+                                         Option<InstantRange> instantRange,
                                          ExternalSpillableMap.DiskMapType diskMapType,
                                          boolean isBitCaskDiskMapCompressionEnabled,
-                                         boolean withOperationField, boolean enableFullScan,
-                                         Option<String> partitionName) {
+                                         boolean withOperationField, boolean forceFullScan,
+                                         Option<String> partitionName, InternalSchema internalSchema) {
     super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize,
         instantRange, withOperationField,
-        enableFullScan, partitionName);
+        forceFullScan, partitionName, internalSchema);
     try {
       // Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
       this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath, new DefaultSizeEstimator(),
@@ -90,7 +95,7 @@ protected HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<Stri
       throw new HoodieIOException("IOException when creating ExternalSpillableMap at " + spillableMapBasePath, e);
     }
 
-    if (autoScan) {
+    if (forceFullScan) {
       performScan();
     }
   }
@@ -112,10 +117,12 @@ protected void performScan() {
 
   @Override
   public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
+    checkState(forceFullScan, "Record reader has to be in full-scan mode to use this API");
     return records.iterator();
   }
 
   public Map<String, HoodieRecord<? extends HoodieRecordPayload>> getRecords() {
+    checkState(forceFullScan, "Record reader has to be in full-scan mode to use this API");
     return records;
   }
 
@@ -135,7 +142,7 @@ protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoo
     String key = hoodieRecord.getRecordKey();
     if (records.containsKey(key)) {
       // Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
-      // done when a delete (empty payload) is encountered before or after an insert/update.
+      // done when a DELETE (empty payload) is encountered before or after an insert/update.
 
       HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
       HoodieRecordPayload oldValue = oldRecord.getData();
@@ -152,9 +159,29 @@ protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoo
   }
 
   @Override
-  protected void processNextDeletedKey(HoodieKey hoodieKey) {
-    records.put(hoodieKey.getRecordKey(), SpillableMapUtils.generateEmptyPayload(hoodieKey.getRecordKey(),
-        hoodieKey.getPartitionPath(), getPayloadClassFQN()));
+  protected void processNextDeletedRecord(DeleteRecord deleteRecord) {
+    String key = deleteRecord.getRecordKey();
+    HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
+    if (oldRecord != null) {
+      // Merge and store the merged record. The ordering val is taken to decide whether the same key record
+      // should be deleted or be kept. The old record is kept only if the DELETE record has smaller ordering val.
+      // For same ordering values, uses the natural order(arrival time semantics).
+
+      Comparable curOrderingVal = oldRecord.getData().getOrderingValue();
+      Comparable deleteOrderingVal = deleteRecord.getOrderingValue();
+      // Checks the ordering value does not equal to 0
+      // because we use 0 as the default value which means natural order
+      boolean choosePrev = !deleteOrderingVal.equals(0)
+          && ReflectionUtils.isSameClass(curOrderingVal, deleteOrderingVal)
+          && curOrderingVal.compareTo(deleteOrderingVal) > 0;
+      if (choosePrev) {
+        // The DELETE message is obsolete if the old message has greater orderingVal.
+        return;
+      }
+    }
+    // Put the DELETE record
+    records.put(key, SpillableMapUtils.generateEmptyPayload(key,
+        deleteRecord.getPartitionPath(), deleteRecord.getOrderingValue(), getPayloadClassFQN()));
   }
 
   public long getTotalTimeTakenToReadAndMergeBlocks() {
@@ -175,6 +202,7 @@ public static class Builder extends AbstractHoodieLogRecordReader.Builder {
     protected String basePath;
     protected List<String> logFilePaths;
     protected Schema readerSchema;
+    private InternalSchema internalSchema = InternalSchema.getEmptyInternalSchema();
     protected String latestInstantTime;
     protected boolean readBlocksLazily;
     protected boolean reverseReader;
@@ -187,8 +215,6 @@ public static class Builder extends AbstractHoodieLogRecordReader.Builder {
     // incremental filtering
     protected Option<InstantRange> instantRange = Option.empty();
     protected String partitionName;
-    // auto scan default true
-    private boolean autoScan = true;
     // operation field default false
     private boolean withOperationField = false;
 
@@ -266,8 +292,8 @@ public Builder withBitCaskDiskMapCompressionEnabled(boolean isBitCaskDiskMapComp
       return this;
     }
 
-    public Builder withAutoScan(boolean autoScan) {
-      this.autoScan = autoScan;
+    public Builder withInternalSchema(InternalSchema internalSchema) {
+      this.internalSchema = internalSchema == null ? InternalSchema.getEmptyInternalSchema() : internalSchema;
       return this;
     }
 
@@ -286,9 +312,9 @@ public Builder withPartition(String partitionName) {
     public HoodieMergedLogRecordScanner build() {
       return new HoodieMergedLogRecordScanner(fs, basePath, logFilePaths, readerSchema,
           latestInstantTime, maxMemorySizeInBytes, readBlocksLazily, reverseReader,
-          bufferSize, spillableMapBasePath, instantRange, autoScan,
+          bufferSize, spillableMapBasePath, instantRange,
           diskMapType, isBitCaskDiskMapCompressionEnabled, withOperationField, true,
-          Option.ofNullable(partitionName));
+          Option.ofNullable(partitionName), internalSchema);
     }
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java
index f781a148a3938..8ea34d6f2fa0d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java
@@ -18,7 +18,7 @@
 
 package org.apache.hudi.common.table.log;
 
-import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.DeleteRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.Option;
@@ -56,7 +56,7 @@ protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoo
   }
 
   @Override
-  protected void processNextDeletedKey(HoodieKey key) {
+  protected void processNextDeletedRecord(DeleteRecord deleteRecord) {
     throw new IllegalStateException("Not expected to see delete records in this log-scan mode. Check Job Config");
   }
 
@@ -64,9 +64,9 @@ protected void processNextDeletedKey(HoodieKey key) {
    * A callback for log record scanner.
    */
   @FunctionalInterface
-  public static interface LogRecordScannerCallback {
+  public interface LogRecordScannerCallback {
 
-    public void apply(HoodieRecord<? extends HoodieRecordPayload> record) throws Exception;
+    void apply(HoodieRecord<? extends HoodieRecordPayload> record) throws Exception;
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
index e7f183fafcdd4..491c6700c9067 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
@@ -34,6 +34,7 @@
 import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.internal.schema.InternalSchema;
 
 import javax.annotation.Nonnull;
 import java.io.ByteArrayInputStream;
@@ -62,6 +63,17 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
 
   private final ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
 
+  public HoodieAvroDataBlock(FSDataInputStream inputStream,
+                             Option<byte[]> content,
+                             boolean readBlockLazily,
+                             HoodieLogBlockContentLocation logBlockContentLocation,
+                             Option<Schema> readerSchema,
+                             Map<HeaderMetadataType, String> header,
+                             Map<HeaderMetadataType, String> footer,
+                             String keyField, InternalSchema internalSchema) {
+    super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, keyField, false, internalSchema);
+  }
+
   public HoodieAvroDataBlock(FSDataInputStream inputStream,
                              Option<byte[]> content,
                              boolean readBlockLazily,
@@ -126,7 +138,7 @@ protected byte[] serializeRecords(List<IndexedRecord> records) throws IOExceptio
   @Override
   protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException {
     checkState(this.readerSchema != null, "Reader's schema has to be non-null");
-    return RecordIterator.getInstance(this, content);
+    return RecordIterator.getInstance(this, content, internalSchema);
   }
 
   private static class RecordIterator implements ClosableIterator<IndexedRecord> {
@@ -138,7 +150,7 @@ private static class RecordIterator implements ClosableIterator<IndexedRecord> {
     private int totalRecords = 0;
     private int readRecords = 0;
 
-    private RecordIterator(Schema readerSchema, Schema writerSchema, byte[] content) throws IOException {
+    private RecordIterator(Schema readerSchema, Schema writerSchema, byte[] content, InternalSchema internalSchema) throws IOException {
       this.content = content;
 
       this.dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(this.content)));
@@ -147,17 +159,26 @@ private RecordIterator(Schema readerSchema, Schema writerSchema, byte[] content)
       int version = this.dis.readInt();
       HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);
 
-      this.reader = new GenericDatumReader<>(writerSchema, readerSchema);
+      Schema finalReadSchema = readerSchema;
+      if (!internalSchema.isEmptySchema()) {
+        // we should use write schema to read log file,
+        // since when we have done some DDL operation, the readerSchema maybe different from writeSchema, avro reader will throw exception.
+        // eg: origin writeSchema is: "a String, b double" then we add a new column now the readerSchema will be: "a string, c int, b double". it's wrong to use readerSchema to read old log file.
+        // after we read those record by writeSchema,  we rewrite those record with readerSchema in AbstractHoodieLogRecordReader
+        finalReadSchema = writerSchema;
+      }
+
+      this.reader = new GenericDatumReader<>(writerSchema, finalReadSchema);
 
       if (logBlockVersion.hasRecordCount()) {
         this.totalRecords = this.dis.readInt();
       }
     }
 
-    public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock, byte[] content) throws IOException {
+    public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock, byte[] content, InternalSchema internalSchema) throws IOException {
       // Get schema from the header
       Schema writerSchema = new Schema.Parser().parse(dataBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
-      return new RecordIterator(dataBlock.readerSchema, writerSchema, content);
+      return new RecordIterator(dataBlock.readerSchema, writerSchema, content, internalSchema);
     }
 
     @Override
@@ -209,12 +230,16 @@ public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema) {
     super(records, Collections.singletonMap(HeaderMetadataType.SCHEMA, schema.toString()), new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
   }
 
+  public static HoodieAvroDataBlock getBlock(byte[] content, Schema readerSchema) throws IOException {
+    return getBlock(content, readerSchema, InternalSchema.getEmptyInternalSchema());
+  }
+
   /**
    * This method is retained to provide backwards compatibility to HoodieArchivedLogs which were written using
    * HoodieLogFormat V1.
    */
   @Deprecated
-  public static HoodieAvroDataBlock getBlock(byte[] content, Schema readerSchema) throws IOException {
+  public static HoodieAvroDataBlock getBlock(byte[] content, Schema readerSchema, InternalSchema internalSchema) throws IOException {
 
     SizeAwareDataInputStream dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(content)));
 
@@ -228,6 +253,10 @@ public static HoodieAvroDataBlock getBlock(byte[] content, Schema readerSchema)
       readerSchema = writerSchema;
     }
 
+    if (!internalSchema.isEmptySchema()) {
+      readerSchema = writerSchema;
+    }
+
     GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, readerSchema);
     // 2. Get the total records
     int totalRecords = dis.readInt();
@@ -285,7 +314,7 @@ public byte[] getBytes(Schema schema) throws IOException {
     output.write(schemaContent);
 
     List<IndexedRecord> records = new ArrayList<>();
-    try (ClosableIterator<IndexedRecord> recordItr = getRecordItr()) {
+    try (ClosableIterator<IndexedRecord> recordItr = getRecordIterator()) {
       recordItr.forEachRemaining(records::add);
     }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
index 846b8d36a5091..c83b3bc82d56c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
@@ -25,6 +25,7 @@
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hudi.internal.schema.InternalSchema;
 
 import java.io.IOException;
 import java.util.HashSet;
@@ -60,6 +61,8 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
 
   protected final Schema readerSchema;
 
+  protected InternalSchema internalSchema = InternalSchema.getEmptyInternalSchema();
+
   /**
    * NOTE: This ctor is used on the write-path (ie when records ought to be written into the log)
    */
@@ -95,6 +98,25 @@ protected HoodieDataBlock(Option<byte[]> content,
     this.enablePointLookups = enablePointLookups;
   }
 
+  protected HoodieDataBlock(Option<byte[]> content,
+                            FSDataInputStream inputStream,
+                            boolean readBlockLazily,
+                            Option<HoodieLogBlockContentLocation> blockContentLocation,
+                            Option<Schema> readerSchema,
+                            Map<HeaderMetadataType, String> headers,
+                            Map<HeaderMetadataType, String> footer,
+                            String keyFieldName,
+                            boolean enablePointLookups,
+                            InternalSchema internalSchema) {
+    super(headers, footer, blockContentLocation, content, inputStream, readBlockLazily);
+    this.records = Option.empty();
+    this.keyFieldName = keyFieldName;
+    // If no reader-schema has been provided assume writer-schema as one
+    this.readerSchema = readerSchema.orElseGet(() -> getWriterSchema(super.getLogBlockHeader()));
+    this.enablePointLookups = enablePointLookups;
+    this.internalSchema = internalSchema == null ? InternalSchema.getEmptyInternalSchema() : internalSchema;
+  }
+
   @Override
   public byte[] getContentBytes() throws IOException {
     // In case this method is called before realizing records from content
@@ -116,7 +138,7 @@ protected static Schema getWriterSchema(Map<HeaderMetadataType, String> logBlock
   /**
    * Returns all the records iterator contained w/in this block.
    */
-  public final ClosableIterator<IndexedRecord> getRecordItr() {
+  public final ClosableIterator<IndexedRecord> getRecordIterator() {
     if (records.isPresent()) {
       return list2Iterator(records.get());
     }
@@ -140,21 +162,21 @@ public Schema getSchema() {
    * @return List of IndexedRecords for the keys of interest.
    * @throws IOException in case of failures encountered when reading/parsing records
    */
-  public final ClosableIterator<IndexedRecord> getRecordItr(List<String> keys) throws IOException {
+  public final ClosableIterator<IndexedRecord> getRecordIterator(List<String> keys, boolean fullKey) throws IOException {
     boolean fullScan = keys.isEmpty();
     if (enablePointLookups && !fullScan) {
-      return lookupRecords(keys);
+      return lookupRecords(keys, fullKey);
     }
 
     // Otherwise, we fetch all the records and filter out all the records, but the
     // ones requested
-    ClosableIterator<IndexedRecord> allRecords = getRecordItr();
+    ClosableIterator<IndexedRecord> allRecords = getRecordIterator();
     if (fullScan) {
       return allRecords;
     }
 
     HashSet<String> keySet = new HashSet<>(keys);
-    return FilteringIterator.getInstance(allRecords, keySet, this::getRecordKey);
+    return FilteringIterator.getInstance(allRecords, keySet, fullKey, this::getRecordKey);
   }
 
   protected ClosableIterator<IndexedRecord> readRecordsFromBlockPayload() throws IOException {
@@ -171,7 +193,7 @@ protected ClosableIterator<IndexedRecord> readRecordsFromBlockPayload() throws I
     }
   }
 
-  protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys) throws IOException {
+  protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys, boolean fullKey) throws IOException {
     throw new UnsupportedOperationException(
         String.format("Point lookups are not supported by this Data block type (%s)", getBlockType())
     );
@@ -230,21 +252,25 @@ private static class FilteringIterator<T extends IndexedRecord> implements Closa
     private final ClosableIterator<T> nested; // nested iterator
 
     private final Set<String> keys; // the filtering keys
+    private final boolean fullKey;
+
     private final Function<T, Option<String>> keyExtract; // function to extract the key
 
     private T next;
 
-    private FilteringIterator(ClosableIterator<T> nested, Set<String> keys, Function<T, Option<String>> keyExtract) {
+    private FilteringIterator(ClosableIterator<T> nested, Set<String> keys, boolean fullKey, Function<T, Option<String>> keyExtract) {
       this.nested = nested;
       this.keys = keys;
+      this.fullKey = fullKey;
       this.keyExtract = keyExtract;
     }
 
     public static <T extends IndexedRecord> FilteringIterator<T> getInstance(
         ClosableIterator<T> nested,
         Set<String> keys,
+        boolean fullKey,
         Function<T, Option<String>> keyExtract) {
-      return new FilteringIterator<>(nested, keys, keyExtract);
+      return new FilteringIterator<>(nested, keys, fullKey, keyExtract);
     }
 
     @Override
@@ -256,7 +282,13 @@ public void close() {
     public boolean hasNext() {
       while (this.nested.hasNext()) {
         this.next = this.nested.next();
-        if (keys.contains(keyExtract.apply(this.next).orElse(null))) {
+        String key = keyExtract.apply(this.next)
+            .orElseGet(() -> {
+              throw new IllegalStateException(String.format("Record without a key (%s)", this.next));
+            });
+
+        if (fullKey && keys.contains(key)
+            || !fullKey && keys.stream().anyMatch(key::startsWith)) {
           return true;
         }
       }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
index 01159ab72dffe..a5168072d014d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.table.log.block;
 
 import org.apache.hudi.common.fs.SizeAwareDataInputStream;
+import org.apache.hudi.common.model.DeleteRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SerializationUtils;
@@ -31,6 +32,7 @@
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -39,11 +41,11 @@
  */
 public class HoodieDeleteBlock extends HoodieLogBlock {
 
-  private HoodieKey[] keysToDelete;
+  private DeleteRecord[] recordsToDelete;
 
-  public HoodieDeleteBlock(HoodieKey[] keysToDelete, Map<HeaderMetadataType, String> header) {
+  public HoodieDeleteBlock(DeleteRecord[] recordsToDelete, Map<HeaderMetadataType, String> header) {
     this(Option.empty(), null, false, Option.empty(), header, new HashMap<>());
-    this.keysToDelete = keysToDelete;
+    this.recordsToDelete = recordsToDelete;
   }
 
   public HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
@@ -59,23 +61,23 @@ public byte[] getContentBytes() throws IOException {
     // In case this method is called before realizing keys from content
     if (content.isPresent()) {
       return content.get();
-    } else if (readBlockLazily && keysToDelete == null) {
+    } else if (readBlockLazily && recordsToDelete == null) {
       // read block lazily
-      getKeysToDelete();
+      getRecordsToDelete();
     }
 
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream output = new DataOutputStream(baos);
-    byte[] bytesToWrite = SerializationUtils.serialize(getKeysToDelete());
+    byte[] bytesToWrite = SerializationUtils.serialize(getRecordsToDelete());
     output.writeInt(version);
     output.writeInt(bytesToWrite.length);
     output.write(bytesToWrite);
     return baos.toByteArray();
   }
 
-  public HoodieKey[] getKeysToDelete() {
+  public DeleteRecord[] getRecordsToDelete() {
     try {
-      if (keysToDelete == null) {
+      if (recordsToDelete == null) {
         if (!getContent().isPresent() && readBlockLazily) {
           // read content from disk
           inflate();
@@ -86,15 +88,25 @@ public HoodieKey[] getKeysToDelete() {
         int dataLength = dis.readInt();
         byte[] data = new byte[dataLength];
         dis.readFully(data);
-        this.keysToDelete = SerializationUtils.<HoodieKey[]>deserialize(data);
+        this.recordsToDelete = deserialize(version, data);
         deflate();
       }
-      return keysToDelete;
+      return recordsToDelete;
     } catch (IOException io) {
       throw new HoodieIOException("Unable to generate keys to delete from block content", io);
     }
   }
 
+  private static DeleteRecord[] deserialize(int version, byte[] data) {
+    if (version == 1) {
+      // legacy version
+      HoodieKey[] keys = SerializationUtils.<HoodieKey[]>deserialize(data);
+      return Arrays.stream(keys).map(DeleteRecord::create).toArray(DeleteRecord[]::new);
+    } else {
+      return SerializationUtils.<DeleteRecord[]>deserialize(data);
+    }
+  }
+
   @Override
   public HoodieLogBlockType getBlockType() {
     return HoodieLogBlockType.DELETE_BLOCK;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index 557a0db7cbfad..72cb3a0ef3b47 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -18,6 +18,19 @@
 
 package org.apache.hudi.common.table.log.block;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.fs.inline.InLineFSUtils;
+import org.apache.hudi.common.fs.inline.InLineFileSystem;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
+import org.apache.hudi.io.storage.HoodieHFileReader;
+
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
@@ -30,22 +43,12 @@
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.common.fs.inline.InLineFSUtils;
-import org.apache.hudi.common.fs.inline.InLineFileSystem;
-import org.apache.hudi.common.util.ClosableIterator;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
-import org.apache.hudi.io.storage.HoodieHFileReader;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -65,6 +68,9 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
   private static final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
 
   private final Option<Compression.Algorithm> compressionAlgorithm;
+  // This path is used for constructing HFile reader context, which should not be
+  // interpreted as the actual file path for the HFile data blocks
+  private final Path pathForReader;
 
   public HoodieHFileDataBlock(FSDataInputStream inputStream,
                               Option<byte[]> content,
@@ -73,16 +79,20 @@ public HoodieHFileDataBlock(FSDataInputStream inputStream,
                               Option<Schema> readerSchema,
                               Map<HeaderMetadataType, String> header,
                               Map<HeaderMetadataType, String> footer,
-                              boolean enablePointLookups) {
+                              boolean enablePointLookups,
+                              Path pathForReader) {
     super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, HoodieHFileReader.KEY_FIELD_NAME, enablePointLookups);
     this.compressionAlgorithm = Option.empty();
+    this.pathForReader = pathForReader;
   }
 
   public HoodieHFileDataBlock(List<IndexedRecord> records,
                               Map<HeaderMetadataType, String> header,
-                              Compression.Algorithm compressionAlgorithm) {
+                              Compression.Algorithm compressionAlgorithm,
+                              Path pathForReader) {
     super(records, header, new HashMap<>(), HoodieHFileReader.KEY_FIELD_NAME);
     this.compressionAlgorithm = Option.of(compressionAlgorithm);
+    this.pathForReader = pathForReader;
   }
 
   @Override
@@ -95,6 +105,7 @@ protected byte[] serializeRecords(List<IndexedRecord> records) throws IOExceptio
     HFileContext context = new HFileContextBuilder()
         .withBlockSize(DEFAULT_BLOCK_SIZE)
         .withCompression(compressionAlgorithm.get())
+        .withCellComparator(new HoodieHBaseKVComparator())
         .build();
 
     Configuration conf = new Configuration();
@@ -128,7 +139,7 @@ protected byte[] serializeRecords(List<IndexedRecord> records) throws IOExceptio
     }
 
     HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig)
-        .withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create();
+        .withOutputStream(ostream).withFileContext(context).create();
 
     // Write the records
     sortedRecordsMap.forEach((recordKey, recordBytes) -> {
@@ -140,6 +151,8 @@ protected byte[] serializeRecords(List<IndexedRecord> records) throws IOExceptio
       }
     });
 
+    writer.appendFileInfo(HoodieHFileReader.SCHEMA_KEY.getBytes(), getSchema().toString().getBytes());
+
     writer.close();
     ostream.flush();
     ostream.close();
@@ -154,10 +167,9 @@ protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) thr
     // Get schema from the header
     Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
 
+    FileSystem fs = FSUtils.getFs(pathForReader.toString(), new Configuration());
     // Read the content
-    HoodieHFileReader<IndexedRecord> reader = new HoodieHFileReader<>(content);
-    // Sets up the writer schema
-    reader.withSchema(writerSchema);
+    HoodieHFileReader<IndexedRecord> reader = new HoodieHFileReader<>(fs, pathForReader, content, Option.of(writerSchema));
     Iterator<IndexedRecord> recordIterator = reader.getRecordIterator(readerSchema);
     return new ClosableIterator<IndexedRecord>() {
       @Override
@@ -179,13 +191,14 @@ public IndexedRecord next() {
 
   // TODO abstract this w/in HoodieDataBlock
   @Override
-  protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys) throws IOException {
+  protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys, boolean fullKey) throws IOException {
     HoodieLogBlockContentLocation blockContentLoc = getBlockContentLocation().get();
 
     // NOTE: It's important to extend Hadoop configuration here to make sure configuration
     //       is appropriately carried over
     Configuration inlineConf = new Configuration(blockContentLoc.getHadoopConf());
     inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName());
+    inlineConf.setClassLoader(Thread.currentThread().getContextClassLoader());
 
     Path inlinePath = InLineFSUtils.getInlineFilePath(
         blockContentLoc.getLogFile().getPath(),
@@ -193,13 +206,18 @@ protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys) throw
         blockContentLoc.getContentPositionInLogFile(),
         blockContentLoc.getBlockSize());
 
-    // HFile read will be efficient if keys are sorted, since on storage, records are sorted by key. This will avoid unnecessary seeks.
-    Collections.sort(keys);
+    // HFile read will be efficient if keys are sorted, since on storage records are sorted by key.
+    // This will avoid unnecessary seeks.
+    List<String> sortedKeys = new ArrayList<>(keys);
+    Collections.sort(sortedKeys);
 
     final HoodieHFileReader<IndexedRecord> reader =
              new HoodieHFileReader<>(inlineConf, inlinePath, new CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf));
+
     // Get writer's schema from the header
-    final ClosableIterator<IndexedRecord> recordIterator = reader.getRecordIterator(keys, readerSchema);
+    final ClosableIterator<IndexedRecord> recordIterator =
+        fullKey ? reader.getRecordsByKeysIterator(sortedKeys, readerSchema) : reader.getRecordsByKeyPrefixIterator(sortedKeys, readerSchema);
+
     return new ClosableIterator<IndexedRecord>() {
       @Override
       public boolean hasNext() {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java
index d514f28ce1c4a..71336be883781 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java
@@ -51,7 +51,7 @@ public abstract class HoodieLogBlock {
    * corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO : Change this to a class, something
    * like HoodieLogBlockVersionV1/V2 and implement/override operations there
    */
-  public static int version = 1;
+  public static int version = 2;
   // Header for each log block
   private final Map<HeaderMetadataType, String> logBlockHeader;
   // Footer for each log block
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
index 36dd5368d4a63..d912525fe9271 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
@@ -73,7 +73,9 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
       INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION,
       REQUESTED_RESTORE_EXTENSION, INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION,
       ROLLBACK_EXTENSION, REQUESTED_ROLLBACK_EXTENSION, INFLIGHT_ROLLBACK_EXTENSION,
-      REQUESTED_REPLACE_COMMIT_EXTENSION, INFLIGHT_REPLACE_COMMIT_EXTENSION, REPLACE_COMMIT_EXTENSION));
+      REQUESTED_REPLACE_COMMIT_EXTENSION, INFLIGHT_REPLACE_COMMIT_EXTENSION, REPLACE_COMMIT_EXTENSION,
+      REQUESTED_INDEX_COMMIT_EXTENSION, INFLIGHT_INDEX_COMMIT_EXTENSION, INDEX_COMMIT_EXTENSION,
+      REQUESTED_SAVE_SCHEMA_ACTION_EXTENSION, INFLIGHT_SAVE_SCHEMA_ACTION_EXTENSION, SAVE_SCHEMA_ACTION_EXTENSION));
   private static final Logger LOG = LogManager.getLogger(HoodieActiveTimeline.class);
   protected HoodieTableMetaClient metaClient;
 
@@ -99,7 +101,6 @@ public static String createNewInstantTime() {
     return HoodieInstantTimeGenerator.createNewInstantTime(0);
   }
 
-
   /**
    * Returns next instant time that adds N milliseconds to current time.
    * Ensures each instant time is atleast 1 second apart since we create instant times at second granularity
@@ -201,6 +202,11 @@ public void deletePending(HoodieInstant instant) {
     deleteInstantFile(instant);
   }
 
+  public void deleteCompletedRollback(HoodieInstant instant) {
+    ValidationUtils.checkArgument(instant.isCompleted());
+    deleteInstantFile(instant);
+  }
+
   public static void deleteInstantFile(FileSystem fs, String metaPath, HoodieInstant instant) {
     try {
       fs.delete(new Path(metaPath, instant.getFileName()), false);
@@ -220,9 +226,9 @@ public void deleteCompactionRequested(HoodieInstant instant) {
     deleteInstantFile(instant);
   }
 
-  private void deleteInstantFileIfExists(HoodieInstant instant) {
+  public void deleteInstantFileIfExists(HoodieInstant instant) {
     LOG.info("Deleting instant " + instant);
-    Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
+    Path inFlightCommitFilePath = getInstantFileNamePath(instant.getFileName());
     try {
       if (metaClient.getFs().exists(inFlightCommitFilePath)) {
         boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
@@ -241,7 +247,7 @@ private void deleteInstantFileIfExists(HoodieInstant instant) {
 
   private void deleteInstantFile(HoodieInstant instant) {
     LOG.info("Deleting instant " + instant);
-    Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
+    Path inFlightCommitFilePath = getInstantFileNamePath(instant.getFileName());
     try {
       boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
       if (result) {
@@ -256,7 +262,7 @@ private void deleteInstantFile(HoodieInstant instant) {
 
   @Override
   public Option<byte[]> getInstantDetails(HoodieInstant instant) {
-    Path detailPath = new Path(metaClient.getMetaPath(), instant.getFileName());
+    Path detailPath = getInstantFileNamePath(instant.getFileName());
     return readDataFromPath(detailPath);
   }
 
@@ -302,12 +308,12 @@ public Option<Pair<HoodieInstant, HoodieCommitMetadata>> getLastCommitMetadataWi
 
   public Option<byte[]> readCleanerInfoAsBytes(HoodieInstant instant) {
     // Cleaner metadata are always stored only in timeline .hoodie
-    return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
+    return readDataFromPath(getInstantFileNamePath(instant.getFileName()));
   }
 
   public Option<byte[]> readRollbackInfoAsBytes(HoodieInstant instant) {
     // Rollback metadata are always stored only in timeline .hoodie
-    return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
+    return readDataFromPath(getInstantFileNamePath(instant.getFileName()));
   }
 
   public Option<byte[]> readRestoreInfoAsBytes(HoodieInstant instant) {
@@ -334,6 +340,10 @@ public Option<byte[]> readCompactionPlanAsBytes(HoodieInstant instant) {
     }
   }
 
+  public Option<byte[]> readIndexPlanAsBytes(HoodieInstant instant) {
+    return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
+  }
+
   /**
    * Revert compaction State from inflight to requested.
    *
@@ -533,24 +543,23 @@ private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant,
       if (metaClient.getTimelineLayoutVersion().isNullVersion()) {
         // Re-create the .inflight file by opening a new file and write the commit metadata in
         createFileInMetaPath(fromInstant.getFileName(), data, allowRedundantTransitions);
-        Path fromInstantPath = new Path(metaClient.getMetaPath(), fromInstant.getFileName());
-        Path toInstantPath = new Path(metaClient.getMetaPath(), toInstant.getFileName());
+        Path fromInstantPath = getInstantFileNamePath(fromInstant.getFileName());
+        Path toInstantPath = getInstantFileNamePath(toInstant.getFileName());
         boolean success = metaClient.getFs().rename(fromInstantPath, toInstantPath);
         if (!success) {
           throw new HoodieIOException("Could not rename " + fromInstantPath + " to " + toInstantPath);
         }
       } else {
         // Ensures old state exists in timeline
-        LOG.info("Checking for file exists ?" + new Path(metaClient.getMetaPath(), fromInstant.getFileName()));
-        ValidationUtils.checkArgument(metaClient.getFs().exists(new Path(metaClient.getMetaPath(),
-            fromInstant.getFileName())));
+        LOG.info("Checking for file exists ?" + getInstantFileNamePath(fromInstant.getFileName()));
+        ValidationUtils.checkArgument(metaClient.getFs().exists(getInstantFileNamePath(fromInstant.getFileName())));
         // Use Write Once to create Target File
         if (allowRedundantTransitions) {
-          FileIOUtils.createFileInPath(metaClient.getFs(), new Path(metaClient.getMetaPath(), toInstant.getFileName()), data);
+          FileIOUtils.createFileInPath(metaClient.getFs(), getInstantFileNamePath(toInstant.getFileName()), data);
         } else {
-          createImmutableFileInPath(new Path(metaClient.getMetaPath(), toInstant.getFileName()), data);
+          createImmutableFileInPath(getInstantFileNamePath(toInstant.getFileName()), data);
         }
-        LOG.info("Create new file for toInstant ?" + new Path(metaClient.getMetaPath(), toInstant.getFileName()));
+        LOG.info("Create new file for toInstant ?" + getInstantFileNamePath(toInstant.getFileName()));
       }
     } catch (IOException e) {
       throw new HoodieIOException("Could not complete " + fromInstant, e);
@@ -559,8 +568,8 @@ private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant,
 
   private void revertCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
     ValidationUtils.checkArgument(completed.getTimestamp().equals(inflight.getTimestamp()));
-    Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), inflight.getFileName());
-    Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName());
+    Path inFlightCommitFilePath = getInstantFileNamePath(inflight.getFileName());
+    Path commitFilePath = getInstantFileNamePath(completed.getFileName());
     try {
       if (metaClient.getTimelineLayoutVersion().isNullVersion()) {
         if (!metaClient.getFs().exists(inFlightCommitFilePath)) {
@@ -571,8 +580,8 @@ private void revertCompleteToInflight(HoodieInstant completed, HoodieInstant inf
           }
         }
       } else {
-        Path requestedInstantFilePath = new Path(metaClient.getMetaPath(),
-            new HoodieInstant(State.REQUESTED, inflight.getAction(), inflight.getTimestamp()).getFileName());
+        Path requestedInstantFilePath = getInstantFileNamePath(new HoodieInstant(State.REQUESTED,
+            inflight.getAction(), inflight.getTimestamp()).getFileName());
 
         // If inflight and requested files do not exist, create one
         if (!metaClient.getFs().exists(requestedInstantFilePath)) {
@@ -591,6 +600,10 @@ private void revertCompleteToInflight(HoodieInstant completed, HoodieInstant inf
     }
   }
 
+  private Path getInstantFileNamePath(String fileName) {
+    return new Path(fileName.contains(SCHEMA_COMMIT_ACTION) ? metaClient.getSchemaFolderName() : metaClient.getMetaPath(), fileName);
+  }
+
   public void transitionRequestedToInflight(String commitType, String inFlightInstant) {
     HoodieInstant requested = new HoodieInstant(HoodieInstant.State.REQUESTED, commitType, inFlightInstant);
     transitionRequestedToInflight(requested, Option.empty(), false);
@@ -647,8 +660,67 @@ public void saveToRestoreRequested(HoodieInstant instant, Option<byte[]> content
     createFileInMetaPath(instant.getFileName(), content, false);
   }
 
+  /**
+   * Transition index instant state from requested to inflight.
+   *
+   * @param requestedInstant Inflight Instant
+   * @return inflight instant
+   */
+  public HoodieInstant transitionIndexRequestedToInflight(HoodieInstant requestedInstant, Option<byte[]> data) {
+    ValidationUtils.checkArgument(requestedInstant.getAction().equals(HoodieTimeline.INDEXING_ACTION),
+        String.format("%s is not equal to %s action", requestedInstant.getAction(), INDEXING_ACTION));
+    ValidationUtils.checkArgument(requestedInstant.isRequested(),
+        String.format("Instant %s not in requested state", requestedInstant.getTimestamp()));
+    HoodieInstant inflightInstant = new HoodieInstant(State.INFLIGHT, INDEXING_ACTION, requestedInstant.getTimestamp());
+    transitionState(requestedInstant, inflightInstant, data);
+    return inflightInstant;
+  }
+
+  /**
+   * Transition index instant state from inflight to completed.
+   * @param inflightInstant Inflight Instant
+   * @return completed instant
+   */
+  public HoodieInstant transitionIndexInflightToComplete(HoodieInstant inflightInstant, Option<byte[]> data) {
+    ValidationUtils.checkArgument(inflightInstant.getAction().equals(HoodieTimeline.INDEXING_ACTION),
+        String.format("%s is not equal to %s action", inflightInstant.getAction(), INDEXING_ACTION));
+    ValidationUtils.checkArgument(inflightInstant.isInflight(),
+        String.format("Instant %s not inflight", inflightInstant.getTimestamp()));
+    HoodieInstant commitInstant = new HoodieInstant(State.COMPLETED, INDEXING_ACTION, inflightInstant.getTimestamp());
+    transitionState(inflightInstant, commitInstant, data);
+    return commitInstant;
+  }
+
+  /**
+   * Revert index instant state from inflight to requested.
+   * @param inflightInstant Inflight Instant
+   * @return requested instant
+   */
+  public HoodieInstant revertIndexInflightToRequested(HoodieInstant inflightInstant) {
+    ValidationUtils.checkArgument(inflightInstant.getAction().equals(HoodieTimeline.INDEXING_ACTION),
+        String.format("%s is not equal to %s action", inflightInstant.getAction(), INDEXING_ACTION));
+    ValidationUtils.checkArgument(inflightInstant.isInflight(),
+        String.format("Instant %s not inflight", inflightInstant.getTimestamp()));
+    HoodieInstant requestedInstant = new HoodieInstant(State.REQUESTED, INDEXING_ACTION, inflightInstant.getTimestamp());
+    if (metaClient.getTimelineLayoutVersion().isNullVersion()) {
+      transitionState(inflightInstant, requestedInstant, Option.empty());
+    } else {
+      deleteInflight(inflightInstant);
+    }
+    return requestedInstant;
+  }
+
+  /**
+   * Save content for inflight/requested index instant.
+   */
+  public void saveToPendingIndexAction(HoodieInstant instant, Option<byte[]> content) {
+    ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.INDEXING_ACTION),
+        String.format("%s is not equal to %s action", instant.getAction(), INDEXING_ACTION));
+    createFileInMetaPath(instant.getFileName(), content, false);
+  }
+
   private void createFileInMetaPath(String filename, Option<byte[]> content, boolean allowOverwrite) {
-    Path fullPath = new Path(metaClient.getMetaPath(), filename);
+    Path fullPath = getInstantFileNamePath(filename);
     if (allowOverwrite || metaClient.getTimelineLayoutVersion().isNullVersion()) {
       FileIOUtils.createFileInPath(metaClient.getFs(), fullPath, content);
     } else {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
index ddfe22ac9e02e..a9b25844ec7ef 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.FileIOUtils;
@@ -117,7 +118,8 @@ record -> HoodieInstant.State.COMPLETED.toString().equals(record.get(ACTION_STAT
    *
    * @deprecated
    */
-  public HoodieArchivedTimeline() {}
+  public HoodieArchivedTimeline() {
+  }
 
   /**
    * This method is only used when this object is deserialized in a spark executor.
@@ -206,6 +208,8 @@ private Option<String> getMetadataKey(String action) {
         return Option.of("hoodieCompactionPlan");
       case HoodieTimeline.REPLACE_COMMIT_ACTION:
         return Option.of("hoodieReplaceCommitMetadata");
+      case HoodieTimeline.INDEXING_ACTION:
+        return Option.of("hoodieIndexCommitMetadata");
       default:
         LOG.error(String.format("Unknown action in metadata (%s)", action));
         return Option.empty();
@@ -248,16 +252,19 @@ private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadIns
           int instantsInPreviousFile = instantsInRange.size();
           // Read the avro blocks
           while (reader.hasNext()) {
-            HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-            // TODO If we can store additional metadata in datablock, we can skip parsing records
-            // (such as startTime, endTime of records in the block)
-            try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
-              StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
-                  // Filter blocks in desired time window
-                  .filter(r -> commitsFilter.apply((GenericRecord) r))
-                  .map(r -> readCommit((GenericRecord) r, loadInstantDetails))
-                  .filter(c -> filter == null || filter.isInRange(c))
-                  .forEach(instantsInRange::add);
+            HoodieLogBlock block = reader.next();
+            if (block instanceof HoodieAvroDataBlock) {
+              HoodieAvroDataBlock avroBlock = (HoodieAvroDataBlock) block;
+              // TODO If we can store additional metadata in datablock, we can skip parsing records
+              // (such as startTime, endTime of records in the block)
+              try (ClosableIterator<IndexedRecord> itr = avroBlock.getRecordIterator()) {
+                StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
+                    // Filter blocks in desired time window
+                    .filter(r -> commitsFilter.apply((GenericRecord) r))
+                    .map(r -> readCommit((GenericRecord) r, loadInstantDetails))
+                    .filter(c -> filter == null || filter.isInRange(c))
+                    .forEach(instantsInRange::add);
+              }
             }
           }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
index 2cf111e91c812..ac1dd007d0527 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
@@ -75,7 +75,8 @@ public void setInstants(List<HoodieInstant> instants) {
    *
    * @deprecated
    */
-  public HoodieDefaultTimeline() {}
+  public HoodieDefaultTimeline() {
+  }
 
   @Override
   public HoodieTimeline filterInflights() {
@@ -112,6 +113,16 @@ public HoodieDefaultTimeline getWriteTimeline() {
     return new HoodieDefaultTimeline(instants.stream().filter(s -> validActions.contains(s.getAction())), details);
   }
 
+  @Override
+  public HoodieTimeline getContiguousCompletedWriteTimeline() {
+    Option<HoodieInstant> earliestPending = getWriteTimeline().filterInflightsAndRequested().firstInstant();
+    if (earliestPending.isPresent()) {
+      return getWriteTimeline().filterCompletedInstants()
+          .filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), LESSER_THAN, earliestPending.get().getTimestamp()));
+    }
+    return getWriteTimeline().filterCompletedInstants();
+  }
+
   @Override
   public HoodieTimeline getCompletedReplaceTimeline() {
     return new HoodieDefaultTimeline(
@@ -181,6 +192,16 @@ public HoodieTimeline filter(Predicate<HoodieInstant> filter) {
     return new HoodieDefaultTimeline(instants.stream().filter(filter), details);
   }
 
+  @Override
+  public HoodieTimeline filterPendingIndexTimeline() {
+    return new HoodieDefaultTimeline(instants.stream().filter(s -> s.getAction().equals(INDEXING_ACTION) && !s.isCompleted()), details);
+  }
+
+  @Override
+  public HoodieTimeline filterCompletedIndexTimeline() {
+    return new HoodieDefaultTimeline(instants.stream().filter(s -> s.getAction().equals(INDEXING_ACTION) && s.isCompleted()), details);
+  }
+
   /**
    * Get all instants (commits, delta commits) that produce new data, in the active timeline.
    */
@@ -189,12 +210,12 @@ public HoodieTimeline getCommitsTimeline() {
   }
 
   /**
-   * Get all instants (commits, delta commits, compaction, clean, savepoint, rollback) that result in actions,
+   * Get all instants (commits, delta commits, compaction, clean, savepoint, rollback, replace commits, index) that result in actions,
    * in the active timeline.
    */
   public HoodieTimeline getAllCommitsTimeline() {
     return getTimelineOfActions(CollectionUtils.createSet(COMMIT_ACTION, DELTA_COMMIT_ACTION,
-            CLEAN_ACTION, COMPACTION_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION, REPLACE_COMMIT_ACTION));
+            CLEAN_ACTION, COMPACTION_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION, REPLACE_COMMIT_ACTION, INDEXING_ACTION));
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java
index 9cd0883126495..8b1cb875c09f6 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java
@@ -172,6 +172,14 @@ public String getFileName() {
       return isInflight() ? HoodieTimeline.makeInflightReplaceFileName(timestamp)
           : isRequested() ? HoodieTimeline.makeRequestedReplaceFileName(timestamp)
           : HoodieTimeline.makeReplaceFileName(timestamp);
+    } else if (HoodieTimeline.INDEXING_ACTION.equals(action)) {
+      return isInflight() ? HoodieTimeline.makeInflightIndexFileName(timestamp)
+          : isRequested() ? HoodieTimeline.makeRequestedIndexFileName(timestamp)
+          : HoodieTimeline.makeIndexCommitFileName(timestamp);
+    } else if (HoodieTimeline.SCHEMA_COMMIT_ACTION.equals(action)) {
+      return isInflight() ? HoodieTimeline.makeInflightSchemaFileName(timestamp)
+          : isRequested() ? HoodieTimeline.makeRequestSchemaFileName(timestamp)
+          : HoodieTimeline.makeSchemaFileName(timestamp);
     }
     throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
index 25b9c2ec6f2e4..c3fbd97312c0e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
@@ -55,10 +55,13 @@ public interface HoodieTimeline extends Serializable {
   String COMPACTION_ACTION = "compaction";
   String REQUESTED_EXTENSION = ".requested";
   String RESTORE_ACTION = "restore";
+  String INDEXING_ACTION = "indexing";
+  // only for schema save
+  String SCHEMA_COMMIT_ACTION = "schemacommit";
 
   String[] VALID_ACTIONS_IN_TIMELINE = {COMMIT_ACTION, DELTA_COMMIT_ACTION,
       CLEAN_ACTION, SAVEPOINT_ACTION, RESTORE_ACTION, ROLLBACK_ACTION,
-      COMPACTION_ACTION, REPLACE_COMMIT_ACTION};
+      COMPACTION_ACTION, REPLACE_COMMIT_ACTION, INDEXING_ACTION};
 
   String COMMIT_EXTENSION = "." + COMMIT_ACTION;
   String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
@@ -84,6 +87,12 @@ public interface HoodieTimeline extends Serializable {
   String INFLIGHT_REPLACE_COMMIT_EXTENSION = "." + REPLACE_COMMIT_ACTION + INFLIGHT_EXTENSION;
   String REQUESTED_REPLACE_COMMIT_EXTENSION = "." + REPLACE_COMMIT_ACTION + REQUESTED_EXTENSION;
   String REPLACE_COMMIT_EXTENSION = "." + REPLACE_COMMIT_ACTION;
+  String INFLIGHT_INDEX_COMMIT_EXTENSION = "." + INDEXING_ACTION + INFLIGHT_EXTENSION;
+  String REQUESTED_INDEX_COMMIT_EXTENSION = "." + INDEXING_ACTION + REQUESTED_EXTENSION;
+  String INDEX_COMMIT_EXTENSION = "." + INDEXING_ACTION;
+  String SAVE_SCHEMA_ACTION_EXTENSION = "." + SCHEMA_COMMIT_ACTION;
+  String INFLIGHT_SAVE_SCHEMA_ACTION_EXTENSION = "." + SCHEMA_COMMIT_ACTION + INFLIGHT_EXTENSION;
+  String REQUESTED_SAVE_SCHEMA_ACTION_EXTENSION = "." + SCHEMA_COMMIT_ACTION + REQUESTED_EXTENSION;
 
   String INVALID_INSTANT_TS = "0";
 
@@ -139,6 +148,15 @@ public interface HoodieTimeline extends Serializable {
    */
   HoodieTimeline getWriteTimeline();
 
+  /**
+   * Timeline to just include commits (commit/deltacommit), compaction and replace actions that are completed and contiguous.
+   * For example, if timeline is [C0.completed, C1.completed, C2.completed, C3.inflight, C4.completed].
+   * Then, a timeline of [C0.completed, C1.completed, C2.completed] will be returned.
+   *
+   * @return
+   */
+  HoodieTimeline getContiguousCompletedWriteTimeline();
+
   /**
    * Timeline to just include replace instants that have valid (commit/deltacommit) actions.
    *
@@ -198,6 +216,16 @@ public interface HoodieTimeline extends Serializable {
    */
   HoodieTimeline filter(Predicate<HoodieInstant> filter);
 
+  /**
+   * Filter this timeline to just include requested and inflight index instants.
+   */
+  HoodieTimeline filterPendingIndexTimeline();
+
+  /**
+   * Filter this timeline to just include completed index instants.
+   */
+  HoodieTimeline filterCompletedIndexTimeline();
+
   /**
    * If the timeline has any instants.
    *
@@ -341,6 +369,14 @@ static HoodieInstant getRollbackRequestedInstant(HoodieInstant instant) {
     return instant.isRequested() ? instant : HoodieTimeline.getRequestedInstant(instant);
   }
 
+  static HoodieInstant getIndexRequestedInstant(final String timestamp) {
+    return new HoodieInstant(State.REQUESTED, INDEXING_ACTION, timestamp);
+  }
+
+  static HoodieInstant getIndexInflightInstant(final String timestamp) {
+    return new HoodieInstant(State.INFLIGHT, INDEXING_ACTION, timestamp);
+  }
+
   /**
    * Returns the inflight instant corresponding to the instant being passed. Takes care of changes in action names
    * between inflight and completed instants (compaction <=> commit).
@@ -454,4 +490,28 @@ static String makeFileNameAsComplete(String fileName) {
   static String makeFileNameAsInflight(String fileName) {
     return StringUtils.join(fileName, HoodieTimeline.INFLIGHT_EXTENSION);
   }
+
+  static String makeIndexCommitFileName(String instant) {
+    return StringUtils.join(instant, HoodieTimeline.INDEX_COMMIT_EXTENSION);
+  }
+
+  static String makeInflightIndexFileName(String instant) {
+    return StringUtils.join(instant, HoodieTimeline.INFLIGHT_INDEX_COMMIT_EXTENSION);
+  }
+
+  static String makeRequestedIndexFileName(String instant) {
+    return StringUtils.join(instant, HoodieTimeline.REQUESTED_INDEX_COMMIT_EXTENSION);
+  }
+
+  static String makeSchemaFileName(String instantTime) {
+    return StringUtils.join(instantTime, HoodieTimeline.SAVE_SCHEMA_ACTION_EXTENSION);
+  }
+
+  static String makeInflightSchemaFileName(String instantTime) {
+    return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_SAVE_SCHEMA_ACTION_EXTENSION);
+  }
+
+  static String makeRequestSchemaFileName(String instantTime) {
+    return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_SAVE_SCHEMA_ACTION_EXTENSION);
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
index 70a23f1b4c0fb..b50846b8780bf 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
@@ -22,6 +22,8 @@
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPlan;
 import org.apache.hudi.avro.model.HoodieInstantInfo;
 import org.apache.hudi.avro.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
@@ -137,6 +139,14 @@ public static Option<byte[]> serializeRequestedReplaceMetadata(HoodieRequestedRe
     return serializeAvroMetadata(clusteringPlan, HoodieRequestedReplaceMetadata.class);
   }
 
+  public static Option<byte[]> serializeIndexPlan(HoodieIndexPlan indexPlan) throws IOException {
+    return serializeAvroMetadata(indexPlan, HoodieIndexPlan.class);
+  }
+
+  public static Option<byte[]> serializeIndexCommitMetadata(HoodieIndexCommitMetadata indexCommitMetadata) throws IOException {
+    return serializeAvroMetadata(indexCommitMetadata, HoodieIndexCommitMetadata.class);
+  }
+
   public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz)
       throws IOException {
     DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
@@ -180,6 +190,14 @@ public static HoodieReplaceCommitMetadata deserializeHoodieReplaceMetadata(byte[
     return deserializeAvroMetadata(bytes, HoodieReplaceCommitMetadata.class);
   }
 
+  public static HoodieIndexPlan deserializeIndexPlan(byte[] bytes) throws IOException {
+    return deserializeAvroMetadata(bytes, HoodieIndexPlan.class);
+  }
+
+  public static HoodieIndexCommitMetadata deserializeIndexCommitMetadata(byte[] bytes) throws IOException {
+    return deserializeAvroMetadata(bytes, HoodieIndexCommitMetadata.class);
+  }
+
   public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
       throws IOException {
     DatumReader<T> reader = new SpecificDatumReader<>(clazz);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV1MigrationHandler.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV1MigrationHandler.java
index 0010aa21fb1c1..66fdfeb62c207 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV1MigrationHandler.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV1MigrationHandler.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.common.table.timeline.versioning.clean;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -61,6 +62,6 @@ public HoodieCleanerPlan downgradeFrom(HoodieCleanerPlan plan) {
             .collect(Collectors.toList()));
         }).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
     return new HoodieCleanerPlan(plan.getEarliestInstantToRetain(), plan.getPolicy(), filesPerPartition, VERSION,
-        new HashMap<>());
+        new HashMap<>(), new ArrayList<>());
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV2MigrationHandler.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV2MigrationHandler.java
index e141e9a15499f..fd82109bd4529 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV2MigrationHandler.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/clean/CleanPlanV2MigrationHandler.java
@@ -27,6 +27,7 @@
 
 import org.apache.hadoop.fs.Path;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -53,7 +54,7 @@ public HoodieCleanerPlan upgradeFrom(HoodieCleanerPlan plan) {
             new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), e.getKey()), v).toString(), false))
           .collect(Collectors.toList()))).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
     return new HoodieCleanerPlan(plan.getEarliestInstantToRetain(), plan.getPolicy(), new HashMap<>(), VERSION,
-        filePathsPerPartition);
+        filePathsPerPartition, new ArrayList<>());
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
index 32c7125e309ea..4683fd6919ab4 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
@@ -162,7 +162,7 @@ private static HoodieTableFileSystemView createInMemoryFileSystemView(HoodieMeta
     HoodieTimeline timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
     if (metadataConfig.enabled()) {
       ValidationUtils.checkArgument(metadataSupplier != null, "Metadata supplier is null. Cannot instantiate metadata file system view");
-      return new HoodieMetadataFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(),
+      return new HoodieMetadataFileSystemView(metaClient, metaClient.getActiveTimeline().filterCompletedAndCompactionInstants(),
           metadataSupplier.get());
     }
     return new HoodieTableFileSystemView(metaClient, timeline, viewConf.isIncrementalTimelineSyncEnabled());
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java
index 299dbab10c368..9dac36081384b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java
@@ -18,9 +18,9 @@
 
 package org.apache.hudi.common.table.view;
 
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hudi.common.model.BootstrapBaseFileMapping;
 import org.apache.hudi.common.model.CompactionOperation;
+import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -29,6 +29,8 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
+
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -358,6 +360,19 @@ protected Option<HoodieInstant> getReplaceInstant(final HoodieFileGroupId fileGr
     return Option.ofNullable(fgIdToReplaceInstants.get(fileGroupId));
   }
 
+  /**
+   * Get the latest file slices for a given partition including the inflight ones.
+   *
+   * @param partitionPath
+   * @return Stream of latest {@link FileSlice} in the partition path.
+   */
+  public Stream<FileSlice> fetchLatestFileSlicesIncludingInflight(String partitionPath) {
+    return fetchAllStoredFileGroups(partitionPath)
+        .map(HoodieFileGroup::getLatestFileSlicesIncludingInflight)
+        .filter(Option::isPresent)
+        .map(Option::get);
+  }
+
   @Override
   public void close() {
     super.close();
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java
index 7ec6110d723ab..d6391d178eb32 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java
@@ -18,14 +18,6 @@
 
 package org.apache.hudi.common.util;
 
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroWriteSupport;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.BloomFilterFactory;
@@ -36,6 +28,16 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.keygen.BaseKeyGenerator;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
 public abstract class BaseFileUtils {
 
   public static BaseFileUtils getInstance(String path) {
@@ -204,4 +206,9 @@ public abstract Map<String, String> readFooter(Configuration configuration, bool
    * @return The Avro schema of the data file
    */
   public abstract Schema readAvroSchema(Configuration configuration, Path filePath);
+
+  /**
+   * @return The subclass's {@link HoodieFileFormat}.
+   */
+  public abstract HoodieFileFormat getFormat();
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java
index a3a1305667f6a..df4e9ac402c6d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java
@@ -64,13 +64,13 @@ public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
     for (HoodieCleanStat stat : cleanStats) {
       HoodieCleanPartitionMetadata metadata =
           new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
-              stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
+              stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles(), stat.isPartitionDeleted());
       partitionMetadataMap.put(stat.getPartitionPath(), metadata);
       if ((null != stat.getDeleteBootstrapBasePathPatterns())
           && (!stat.getDeleteBootstrapBasePathPatterns().isEmpty())) {
         HoodieCleanPartitionMetadata bootstrapMetadata = new HoodieCleanPartitionMetadata(stat.getPartitionPath(),
             stat.getPolicy().name(), stat.getDeleteBootstrapBasePathPatterns(), stat.getSuccessDeleteBootstrapBaseFiles(),
-            stat.getFailedDeleteBootstrapBaseFiles());
+            stat.getFailedDeleteBootstrapBaseFiles(), stat.isPartitionDeleted());
         partitionBootstrapMetadataMap.put(stat.getPartitionPath(), bootstrapMetadata);
       }
       totalDeleted += stat.getSuccessDeleteFiles().size();
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
index 9741ceef3ede3..9040a04d5edcf 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
@@ -22,6 +22,7 @@
 
 import java.lang.reflect.Array;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -31,13 +32,35 @@
 import java.util.Objects;
 import java.util.Properties;
 import java.util.Set;
+import java.util.Spliterator;
+import java.util.Spliterators;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 
 public class CollectionUtils {
 
   public static final Properties EMPTY_PROPERTIES = new Properties();
 
+  public static boolean isNullOrEmpty(Collection<?> c) {
+    return Objects.isNull(c) || c.isEmpty();
+  }
+
+  public static boolean nonEmpty(Collection<?> c) {
+    return !isNullOrEmpty(c);
+  }
+
+  /**
+   * Collects provided {@link Iterator} to a {@link Stream}
+   */
+  public static <T> Stream<T> toStream(Iterator<T> iterator) {
+    return StreamSupport.stream(
+        Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED),
+        false
+    );
+  }
+
   /**
    * Combines provided arrays into one
    */
@@ -105,6 +128,21 @@ public static <E> List<E> diff(List<E> one, List<E> another) {
     return diff;
   }
 
+  public static <E> Stream<List<E>> batchesAsStream(List<E> list, int batchSize) {
+    ValidationUtils.checkArgument(batchSize > 0, "batch size must be positive.");
+    int total = list.size();
+    if (total <= 0) {
+      return Stream.empty();
+    }
+    int numFullBatches = (total - 1) / batchSize;
+    return IntStream.range(0, numFullBatches + 1).mapToObj(
+        n -> list.subList(n * batchSize, n == numFullBatches ? total : (n + 1) * batchSize));
+  }
+
+  public static <E> List<List<E>> batches(List<E> list, int batchSize) {
+    return batchesAsStream(list, batchSize).collect(Collectors.toList());
+  }
+
   /**
    * Determines whether two iterators contain equal elements in the same order. More specifically,
    * this method returns {@code true} if {@code iterator1} and {@code iterator2} contain the same
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CommitUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CommitUtils.java
index 9970687abb4f2..08b775f60ee95 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/CommitUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CommitUtils.java
@@ -97,7 +97,7 @@ private static HoodieCommitMetadata buildMetadataFromStats(List<HoodieWriteStat>
                                                              String commitActionType,
                                                              WriteOperationType operationType) {
     final HoodieCommitMetadata commitMetadata;
-    if (commitActionType == HoodieTimeline.REPLACE_COMMIT_ACTION) {
+    if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(commitActionType)) {
       HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
       replaceMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
       commitMetadata = replaceMetadata;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/DateTimeUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/DateTimeUtils.java
index 531a0903f66fc..cf90eff8d6185 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/DateTimeUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/DateTimeUtils.java
@@ -39,6 +39,35 @@ public class DateTimeUtils {
   private static final Map<String, ChronoUnit> LABEL_TO_UNIT_MAP =
       Collections.unmodifiableMap(initMap());
 
+  /**
+   * Converts provided microseconds (from epoch) to {@link Instant}
+   */
+  public static Instant microsToInstant(long microsFromEpoch) {
+    long epochSeconds = microsFromEpoch / (1_000_000L);
+    long nanoAdjustment = (microsFromEpoch % (1_000_000L)) * 1_000L;
+
+    return Instant.ofEpochSecond(epochSeconds, nanoAdjustment);
+  }
+
+  /**
+   * Converts provided {@link Instant} to microseconds (from epoch)
+   */
+  public static long instantToMicros(Instant instant) {
+    long seconds = instant.getEpochSecond();
+    int nanos = instant.getNano();
+
+    if (seconds < 0 && nanos > 0) {
+      long micros = Math.multiplyExact(seconds + 1, 1_000_000L);
+      long adjustment = (nanos / 1_000L) - 1_000_000;
+
+      return Math.addExact(micros, adjustment);
+    } else {
+      long micros = Math.multiplyExact(seconds, 1_000_000L);
+
+      return Math.addExact(micros, nanos / 1_000L);
+    }
+  }
+
   /**
    * Parse input String to a {@link java.time.Instant}.
    *
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java b/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java
new file mode 100644
index 0000000000000..1d2786197780c
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
+import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+
+public class InternalSchemaCache {
+  private static final Logger LOG = LogManager.getLogger(InternalSchemaCache.class);
+  // Use segment lock to reduce competition.
+  // the lock size should be powers of 2 for better hash.
+  private static Object[] lockList = new Object[16];
+
+  static {
+    for (int i = 0; i < lockList.length; i++) {
+      lockList[i] = new Object();
+    }
+  }
+
+  // historySchemas cache maintain a map about (tablePath, HistorySchemas).
+  // this is a Global cache, all threads in one container/executor share the same cache.
+  private static final Cache<String, TreeMap<Long, InternalSchema>>
+      HISTORICAL_SCHEMA_CACHE = Caffeine.newBuilder().maximumSize(1000).weakValues().build();
+
+  /**
+   * Search internalSchema based on versionID.
+   * first step: try to get internalSchema from hoodie commit files, we no need to add lock.
+   * if we cannot get internalSchema by first step, then we try to get internalSchema from cache.
+   *
+   * @param versionID schema version_id need to search
+   * @param metaClient current hoodie metaClient
+   * @return internalSchema
+   */
+  public static InternalSchema searchSchemaAndCache(long versionID, HoodieTableMetaClient metaClient, boolean cacheEnable) {
+    Option<InternalSchema> candidateSchema = getSchemaByReadingCommitFile(versionID, metaClient);
+    if (candidateSchema.isPresent()) {
+      return candidateSchema.get();
+    }
+    if (!cacheEnable) {
+      // parse history schema and return directly
+      return InternalSchemaUtils.searchSchema(versionID, getHistoricalSchemas(metaClient));
+    }
+    String tablePath = metaClient.getBasePath();
+    // use segment lock to reduce competition.
+    synchronized (lockList[tablePath.hashCode() & (lockList.length - 1)]) {
+      TreeMap<Long, InternalSchema> historicalSchemas = HISTORICAL_SCHEMA_CACHE.getIfPresent(tablePath);
+      if (historicalSchemas == null || InternalSchemaUtils.searchSchema(versionID, historicalSchemas) == null) {
+        historicalSchemas = getHistoricalSchemas(metaClient);
+        HISTORICAL_SCHEMA_CACHE.put(tablePath, historicalSchemas);
+      } else {
+        long maxVersionId = historicalSchemas.keySet().stream().max(Long::compareTo).get();
+        if (versionID > maxVersionId) {
+          historicalSchemas = getHistoricalSchemas(metaClient);
+          HISTORICAL_SCHEMA_CACHE.put(tablePath, historicalSchemas);
+        }
+      }
+      return InternalSchemaUtils.searchSchema(versionID, historicalSchemas);
+    }
+  }
+
+  private static TreeMap<Long, InternalSchema> getHistoricalSchemas(HoodieTableMetaClient metaClient) {
+    TreeMap<Long, InternalSchema> result = new TreeMap<>();
+    FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(metaClient);
+    String historySchemaStr = schemasManager.getHistorySchemaStr();
+    if (!StringUtils.isNullOrEmpty(historySchemaStr)) {
+      result = SerDeHelper.parseSchemas(historySchemaStr);
+    }
+    return result;
+  }
+
+  private static Option<InternalSchema> getSchemaByReadingCommitFile(long versionID, HoodieTableMetaClient metaClient) {
+    try {
+      HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
+      List<HoodieInstant> instants = timeline.getInstants().filter(f -> f.getTimestamp().equals(String.valueOf(versionID))).collect(Collectors.toList());
+      if (instants.isEmpty()) {
+        return Option.empty();
+      }
+      byte[] data = timeline.getInstantDetails(instants.get(0)).get();
+      HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
+      String latestInternalSchemaStr = metadata.getMetadata(SerDeHelper.LATEST_SCHEMA);
+      return SerDeHelper.fromJson(latestInternalSchemaStr);
+    } catch (Exception e) {
+      throw new HoodieException("Failed to read schema from commit metadata", e);
+    }
+  }
+
+  /**
+   * Get internalSchema and avroSchema for compaction/cluster operation.
+   *
+   * @param metaClient current hoodie metaClient
+   * @param compactionAndClusteringInstant first instant before current compaction/cluster instant
+   * @return (internalSchemaStrOpt, avroSchemaStrOpt) a pair of InternalSchema/avroSchema
+   */
+  public static Pair<Option<String>, Option<String>> getInternalSchemaAndAvroSchemaForClusteringAndCompaction(HoodieTableMetaClient metaClient, String compactionAndClusteringInstant) {
+    // try to load internalSchema to support Schema Evolution
+    HoodieTimeline timelineBeforeCurrentCompaction = metaClient.getCommitsAndCompactionTimeline().findInstantsBefore(compactionAndClusteringInstant).filterCompletedInstants();
+    Option<HoodieInstant> lastInstantBeforeCurrentCompaction =  timelineBeforeCurrentCompaction.lastInstant();
+    if (lastInstantBeforeCurrentCompaction.isPresent()) {
+      // try to find internalSchema
+      byte[] data = timelineBeforeCurrentCompaction.getInstantDetails(lastInstantBeforeCurrentCompaction.get()).get();
+      HoodieCommitMetadata metadata;
+      try {
+        metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
+      } catch (Exception e) {
+        throw new HoodieException(String.format("cannot read metadata from commit: %s", lastInstantBeforeCurrentCompaction.get()), e);
+      }
+      String internalSchemaStr = metadata.getMetadata(SerDeHelper.LATEST_SCHEMA);
+      if (internalSchemaStr != null) {
+        String existingSchemaStr = metadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY);
+        return Pair.of(Option.of(internalSchemaStr), Option.of(existingSchemaStr));
+      }
+    }
+    return Pair.of(Option.empty(), Option.empty());
+  }
+
+  /**
+   * Give a schema versionId return its internalSchema.
+   * This method will be called by spark tasks, we should minimize time cost.
+   * We try our best to not use metaClient， since the initialization of metaClient is time cost
+   * step1：
+   * try to parser internalSchema from HoodieInstant directly
+   * step2：
+   * if we cannot parser internalSchema in step1，
+   * try to find internalSchema in historySchema.
+   *
+   * @param versionId the internalSchema version to be search.
+   * @param tablePath table path
+   * @param hadoopConf conf
+   * @param validCommits current validate commits, use to make up the commit file path/verify the validity of the history schema files
+   * @return a internalSchema.
+   */
+  public static InternalSchema getInternalSchemaByVersionId(long versionId, String tablePath, Configuration hadoopConf, String validCommits) {
+    Set<String> commitSet = Arrays.stream(validCommits.split(",")).collect(Collectors.toSet());
+    List<String> validateCommitList = commitSet.stream().map(fileName -> {
+      String fileExtension = HoodieInstant.getTimelineFileExtension(fileName);
+      return fileName.replace(fileExtension, "");
+    }).collect(Collectors.toList());
+
+    FileSystem fs = FSUtils.getFs(tablePath, hadoopConf);
+    Path hoodieMetaPath = new Path(tablePath, HoodieTableMetaClient.METAFOLDER_NAME);
+    //step1:
+    Path candidateCommitFile = commitSet.stream().filter(fileName -> {
+      String fileExtension = HoodieInstant.getTimelineFileExtension(fileName);
+      return fileName.replace(fileExtension, "").equals(versionId + "");
+    }).findFirst().map(f -> new Path(hoodieMetaPath, f)).orElse(null);
+    if (candidateCommitFile != null) {
+      try {
+        byte[] data;
+        try (FSDataInputStream is = fs.open(candidateCommitFile)) {
+          data = FileIOUtils.readAsByteArray(is);
+        } catch (IOException e) {
+          throw e;
+        }
+        HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
+        String latestInternalSchemaStr = metadata.getMetadata(SerDeHelper.LATEST_SCHEMA);
+        if (latestInternalSchemaStr != null) {
+          return SerDeHelper.fromJson(latestInternalSchemaStr).orElse(null);
+        }
+      } catch (Exception e1) {
+        // swallow this exception.
+        LOG.warn(String.format("Cannot find internal schema from commit file %s. Falling back to parsing historical internal schema", candidateCommitFile.toString()));
+      }
+    }
+    // step2:
+    FileBasedInternalSchemaStorageManager fileBasedInternalSchemaStorageManager = new FileBasedInternalSchemaStorageManager(hadoopConf, new Path(tablePath));
+    String lastestHistorySchema = fileBasedInternalSchemaStorageManager.getHistorySchemaStrByGivenValidCommits(validateCommitList);
+    return InternalSchemaUtils.searchSchema(versionId, SerDeHelper.parseSchemas(lastestHistorySchema));
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/MapUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/MapUtils.java
new file mode 100644
index 0000000000000..c39f6fd74f424
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/MapUtils.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import java.util.Map;
+import java.util.Objects;
+
+public class MapUtils {
+
+  public static boolean isNullOrEmpty(Map<?, ?> m) {
+    return Objects.isNull(m) || m.isEmpty();
+  }
+
+  public static boolean nonEmpty(Map<?, ?> m) {
+    return !isNullOrEmpty(m);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/NetworkUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/NetworkUtils.java
index 29c42e3ea11ce..329c1090ea3bd 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/NetworkUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/NetworkUtils.java
@@ -22,11 +22,14 @@
 
 import java.io.IOException;
 import java.net.DatagramSocket;
+import java.net.Inet4Address;
 import java.net.InetAddress;
-import java.net.InterfaceAddress;
 import java.net.NetworkInterface;
 import java.net.SocketException;
-import java.util.Enumeration;
+import java.net.UnknownHostException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * A utility class for network.
@@ -34,28 +37,51 @@
 public class NetworkUtils {
 
   public static synchronized String getHostname() {
+    InetAddress localAddress;
+    try (DatagramSocket s = new DatagramSocket()) {
+      // see https://stackoverflow.com/questions/9481865/getting-the-ip-address-of-the-current-machine-using-java
+      // for details.
+      s.connect(InetAddress.getByName("8.8.8.8"), 10002);
+      localAddress = s.getLocalAddress();
+      if (validAddress(localAddress)) {
+        return localAddress.getHostAddress();
+      }
+    } catch (IOException e) {
+      throw new HoodieException("Unable to find server port", e);
+    }
+
+    // fallback
     try {
-      Enumeration<NetworkInterface> networkInterfaceEnumeration = NetworkInterface.getNetworkInterfaces();
-      while (networkInterfaceEnumeration.hasMoreElements()) {
-        for (InterfaceAddress interfaceAddress : networkInterfaceEnumeration.nextElement().getInterfaceAddresses()) {
-          InetAddress address = interfaceAddress.getAddress();
-          if (!address.isLinkLocalAddress() && !address.isLoopbackAddress() && !address.isAnyLocalAddress()) {
-            return address.getHostAddress();
+      List<NetworkInterface> activeNetworkIFs = Collections.list(NetworkInterface.getNetworkInterfaces());
+      // On unix-like system, getNetworkInterfaces returns ifs in reverse order
+      // compared to ifconfig output order,
+      // pick ip address following system output order.
+      Collections.reverse(activeNetworkIFs);
+      for (NetworkInterface ni : activeNetworkIFs) {
+        List<InetAddress> addresses = Collections.list(ni.getInetAddresses()).stream()
+            .filter(NetworkUtils::validAddress)
+            .collect(Collectors.toList());
+        if (addresses.size() > 0) {
+          // IPv4 has higher priority
+          InetAddress address = addresses.stream()
+              .filter(addr -> addr instanceof Inet4Address).findAny()
+              .orElse(addresses.get(0));
+          try {
+            // Inet6Address.toHostName may add interface at the end if it knows about it
+            return InetAddress.getByAddress(address.getAddress()).getHostAddress();
+          } catch (UnknownHostException e) {
+            throw new HoodieException("Unable to fetch raw IP address for: " + address);
           }
         }
       }
+
+      return localAddress.getHostAddress();
     } catch (SocketException e) {
       throw new HoodieException("Unable to find server port", e);
     }
+  }
 
-    // fallback
-    try (DatagramSocket s = new DatagramSocket()) {
-      // see https://stackoverflow.com/questions/9481865/getting-the-ip-address-of-the-current-machine-using-java
-      // for details.
-      s.connect(InetAddress.getByName("8.8.8.8"), 10002);
-      return s.getLocalAddress().getHostAddress();
-    } catch (IOException e) {
-      throw new HoodieException("Unable to find server port", e);
-    }
+  private static boolean validAddress(InetAddress address) {
+    return !(address.isLinkLocalAddress() || address.isLoopbackAddress() || address.isAnyLocalAddress());
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
index 88c28d75204a7..0cc40591972a0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
@@ -18,37 +18,39 @@
 
 package org.apache.hudi.common.util;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieKey;
-import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.MetadataNotFoundException;
 import org.apache.hudi.keygen.BaseKeyGenerator;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto.UserMetadataItem;
 import org.apache.orc.Reader;
 import org.apache.orc.Reader.Options;
 import org.apache.orc.RecordReader;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 /**
  * Utility functions for ORC files.
@@ -248,6 +250,11 @@ public Schema readAvroSchema(Configuration conf, Path orcFilePath) {
     }
   }
 
+  @Override
+  public HoodieFileFormat getFormat() {
+    return HoodieFileFormat.ORC;
+  }
+
   @Override
   public long getRowCount(Configuration conf, Path orcFilePath) {
     try (Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
index e74f4f77703d0..c779a3269a12a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
+import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.exception.HoodieIOException;
@@ -58,6 +59,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Function;
+import java.util.stream.Collector;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -227,6 +229,11 @@ public Schema readAvroSchema(Configuration configuration, Path parquetFilePath)
     return new AvroSchemaConverter(configuration).convert(parquetSchema);
   }
 
+  @Override
+  public HoodieFileFormat getFormat() {
+    return HoodieFileFormat.PARQUET;
+  }
+
   /**
    * NOTE: This literally reads the entire file contents, thus should be used with caution.
    */
@@ -288,18 +295,27 @@ public Boolean apply(String recordKey) {
   /**
    * Parse min/max statistics stored in parquet footers for all columns.
    */
+  @SuppressWarnings("rawtype")
   public List<HoodieColumnRangeMetadata<Comparable>> readRangeFromParquetMetadata(
       @Nonnull Configuration conf,
       @Nonnull Path parquetFilePath,
       @Nonnull List<String> cols
   ) {
     ParquetMetadata metadata = readMetadata(conf, parquetFilePath);
+
+    // NOTE: This collector has to have fully specialized generic type params since
+    //       Java 1.8 struggles to infer them
+    Collector<HoodieColumnRangeMetadata<Comparable>, ?, Map<String, List<HoodieColumnRangeMetadata<Comparable>>>> groupingByCollector =
+        Collectors.groupingBy(HoodieColumnRangeMetadata::getColumnName);
+
     // Collect stats from all individual Parquet blocks
-    Map<String, List<HoodieColumnRangeMetadata<Comparable>>> columnToStatsListMap = metadata.getBlocks().stream().sequential()
-            .flatMap(blockMetaData -> blockMetaData.getColumns().stream()
-                    .filter(f -> cols.contains(f.getPath().toDotString()))
+    Map<String, List<HoodieColumnRangeMetadata<Comparable>>> columnToStatsListMap =
+        (Map<String, List<HoodieColumnRangeMetadata<Comparable>>>) metadata.getBlocks().stream().sequential()
+          .flatMap(blockMetaData ->
+              blockMetaData.getColumns().stream()
+                .filter(f -> cols.contains(f.getPath().toDotString()))
                 .map(columnChunkMetaData ->
-                    new HoodieColumnRangeMetadata<Comparable>(
+                    HoodieColumnRangeMetadata.<Comparable>create(
                         parquetFilePath.getName(),
                         columnChunkMetaData.getPath().toDotString(),
                         convertToNativeJavaType(
@@ -312,7 +328,8 @@ public List<HoodieColumnRangeMetadata<Comparable>> readRangeFromParquetMetadata(
                         columnChunkMetaData.getValueCount(),
                         columnChunkMetaData.getTotalSize(),
                         columnChunkMetaData.getTotalUncompressedSize()))
-            ).collect(Collectors.groupingBy(HoodieColumnRangeMetadata::getColumnName));
+          )
+          .collect(groupingByCollector);
 
     // Combine those into file-level statistics
     // NOTE: Inlining this var makes javac (1.8) upset (due to its inability to infer
@@ -360,7 +377,7 @@ private <T extends Comparable<T>> HoodieColumnRangeMetadata<T> combineRanges(
       maxValue = one.getMaxValue();
     }
 
-    return new HoodieColumnRangeMetadata<T>(
+    return HoodieColumnRangeMetadata.create(
         one.getFilePath(),
         one.getColumnName(), minValue, maxValue,
         one.getNullCount() + another.getNullCount(),
@@ -369,7 +386,11 @@ private <T extends Comparable<T>> HoodieColumnRangeMetadata<T> combineRanges(
         one.getTotalUncompressedSize() + another.getTotalUncompressedSize());
   }
 
-  private static Comparable<?> convertToNativeJavaType(PrimitiveType primitiveType, Comparable val) {
+  private static Comparable<?> convertToNativeJavaType(PrimitiveType primitiveType, Comparable<?> val) {
+    if (val == null) {
+      return null;
+    }
+
     if (primitiveType.getOriginalType() == OriginalType.DECIMAL) {
       return extractDecimal(val, primitiveType.getDecimalMetadata());
     } else if (primitiveType.getOriginalType() == OriginalType.DATE) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java
index bc48661c88376..a4ef09641d50c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java
@@ -173,4 +173,11 @@ private static List<String> findClasses(File directory, String packageName) {
     }
     return classes;
   }
+
+  /**
+   * Returns whether the given two comparable values come from the same runtime class.
+   */
+  public static boolean isSameClass(Comparable<?> v, Comparable<?> o) {
+    return v.getClass() == o.getClass();
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java
index 9ded415438a86..d4bafd9c9feee 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java
@@ -161,9 +161,9 @@ private static Object getPreCombineVal(GenericRecord rec, String preCombineField
   /**
    * Utility method to convert bytes to HoodieRecord using schema and payload class.
    */
-  public static <R> R generateEmptyPayload(String recKey, String partitionPath, String payloadClazz) {
+  public static <R> R generateEmptyPayload(String recKey, String partitionPath, Comparable orderingVal, String payloadClazz) {
     HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieAvroRecord<>(new HoodieKey(recKey, partitionPath),
-        ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.empty()}, Option.class));
+        ReflectionUtils.loadPayload(payloadClazz, new Object[] {null, orderingVal}, GenericRecord.class, Comparable.class));
     return (R) hoodieRecord;
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java
index 326bf05277f0b..3e1a1a9cc7f5e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java
@@ -19,6 +19,10 @@
 package org.apache.hudi.common.util;
 
 import javax.annotation.Nullable;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Simple utility for operations on strings.
@@ -46,7 +50,7 @@ public class StringUtils {
    * </pre>
    */
   public static <T> String join(final String... elements) {
-    return join(elements, "");
+    return join(elements, EMPTY_STRING);
   }
 
   public static <T> String joinUsingDelim(String delim, final String... elements) {
@@ -100,4 +104,15 @@ public static String objToString(@Nullable Object obj) {
   private static boolean stringIsNullOrEmpty(@Nullable String string) {
     return string == null || string.isEmpty();
   }
+
+  /**
+   * Splits input string, delimited {@code delimiter} into a list of non-empty strings
+   * (skipping any empty string produced during splitting)
+   */
+  public static List<String> split(@Nullable String input, String delimiter) {
+    if (isNullOrEmpty(input)) {
+      return Collections.emptyList();
+    }
+    return Stream.of(input.split(delimiter)).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java
index 579ae21d3ed99..a739af67909b0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java
+++ b/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIncompatibleSchemaException.java
@@ -21,7 +21,7 @@
 /**
  * Exception for incompatible schema.
  */
-public class HoodieIncompatibleSchemaException extends Exception {
+public class HoodieIncompatibleSchemaException extends RuntimeException {
 
   public HoodieIncompatibleSchemaException(String msg, Throwable e) {
     super(msg, e);
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/HoodieSchemaException.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/HoodieSchemaException.java
new file mode 100644
index 0000000000000..7fdafc5238d10
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/HoodieSchemaException.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema;
+
+import org.apache.hudi.exception.HoodieException;
+
+/**
+ * Exception thrown for Hoodie schema convert failures. The root of the exception hierarchy.
+ * Hoodie Write/Read clients will throw this exception if any of its operations fail. This is a runtime (unchecked)
+ * exception.
+ */
+public class HoodieSchemaException extends HoodieException {
+  public HoodieSchemaException() {
+    super();
+  }
+
+  public HoodieSchemaException(String message) {
+    super(message);
+  }
+
+  public HoodieSchemaException(String message, Throwable t) {
+    super(message, t);
+  }
+
+  public HoodieSchemaException(Throwable t) {
+    super(t);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/InternalSchema.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/InternalSchema.java
new file mode 100644
index 0000000000000..659612cd5cfaf
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/InternalSchema.java
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema;
+
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.internal.schema.Types.Field;
+import org.apache.hudi.internal.schema.Types.RecordType;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Internal schema for hudi table.
+ * used to support schema evolution.
+ */
+public class InternalSchema implements Serializable {
+
+  private static final long DEFAULT_VERSION_ID = 0;
+
+  private final RecordType record;
+
+  private int maxColumnId;
+  private long versionId;
+
+  private transient Map<Integer, Field> idToField = null;
+  private transient Map<String, Integer> nameToId = null;
+  private transient Map<Integer, String> idToName = null;
+
+  public static InternalSchema getEmptyInternalSchema() {
+    return new InternalSchema(-1L, new ArrayList<>());
+  }
+
+  public boolean isEmptySchema() {
+    return versionId < 0;
+  }
+
+  public InternalSchema(List<Field> columns) {
+    this(DEFAULT_VERSION_ID, columns);
+  }
+
+  public InternalSchema(Field... columns) {
+    this(DEFAULT_VERSION_ID, Arrays.asList(columns));
+  }
+
+  public InternalSchema(long versionId, List<Field> cols) {
+    this.versionId = versionId;
+    this.record = RecordType.get(cols);
+    idToName = cols.isEmpty() ? new HashMap<>() : InternalSchemaBuilder.getBuilder().buildIdToName(record);
+    nameToId = cols.isEmpty() ? new HashMap<>() : idToName.entrySet().stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
+    maxColumnId = idToName.isEmpty() ? -1 : idToName.keySet().stream().max(Comparator.comparing(Integer::valueOf)).get();
+  }
+
+  public InternalSchema(long versionId, int maxColumnId, List<Field> cols) {
+    this.maxColumnId = maxColumnId;
+    this.versionId = versionId;
+    this.record = RecordType.get(cols);
+    buildIdToName();
+  }
+
+  public InternalSchema(long versionId, int maxColumnId, Field... cols) {
+    this(versionId, maxColumnId, Arrays.asList(cols));
+  }
+
+  public RecordType getRecord() {
+    return record;
+  }
+
+  private Map<Integer, String> buildIdToName() {
+    if (idToName == null) {
+      idToName = InternalSchemaBuilder.getBuilder().buildIdToName(record);
+    }
+    return idToName;
+  }
+
+  private Map<String, Integer> buildNameToId() {
+    if (nameToId == null) {
+      if (idToName != null && !idToName.isEmpty()) {
+        nameToId = idToName.entrySet().stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
+        return nameToId;
+      }
+      nameToId = InternalSchemaBuilder.getBuilder().buildNameToId(record);
+    }
+    return nameToId;
+  }
+
+  private Map<Integer, Field> buildIdToField() {
+    if (idToField == null) {
+      idToField = InternalSchemaBuilder.getBuilder().buildIdToField(record);
+    }
+    return idToField;
+  }
+
+  /**
+   * Get all columns full name.
+   */
+  public List<String> getAllColsFullName() {
+    if (nameToId == null) {
+      nameToId = InternalSchemaBuilder.getBuilder().buildNameToId(record);
+    }
+    return Arrays.asList(nameToId.keySet().toArray(new String[0]));
+  }
+
+  /**
+   * Set the version ID for this schema.
+   */
+  public InternalSchema setSchemaId(long versionId) {
+    this.versionId = versionId;
+    return this;
+  }
+
+  /**
+   * Returns the version ID for this schema.
+   */
+  public long schemaId() {
+    return this.versionId;
+  }
+
+  /**
+   * Set the version ID for this schema.
+   */
+  public void setMaxColumnId(int maxColumnId) {
+    this.maxColumnId = maxColumnId;
+  }
+
+  /**
+   * Returns the max column id for this schema.
+   */
+  public int getMaxColumnId() {
+    return this.maxColumnId;
+  }
+
+  /**
+   * Returns a List of the {@link Field columns} in this Schema.
+   */
+  public List<Field> columns() {
+    return record.fields();
+  }
+
+  /**
+   * Returns the {@link Type} of a sub-field identified by the field name.
+   *
+   * @param id a field id
+   * @return fullName of field of
+   */
+  public String findfullName(int id) {
+    if (idToName == null) {
+      buildIdToName();
+    }
+    String result = idToName.get(id);
+    return result == null ? "" : result;
+  }
+
+  /**
+   * Returns the {@link Type} of a sub-field identified by the field name.
+   *
+   * @param name a field name
+   * @return a Type for the sub-field or null if it is not found
+   */
+  public Type findType(String name) {
+    if (name == null || name.isEmpty()) {
+      return null;
+    }
+    Integer id = buildNameToId().get(name);
+    if (id != null) { // name is found
+      return findType(id);
+    }
+    return null;
+  }
+
+  /**
+   * Returns the {@link Type} of a sub-field identified by the field id.
+   *
+   * @param id a field id
+   * @return a Type for the sub-field or null if it is not found
+   */
+  public Type findType(int id) {
+    Field field = buildIdToField().get(id);
+    if (field != null) {
+      return field.type();
+    }
+    return null;
+  }
+
+  /**
+   * Returns all field ids
+   */
+  public Set<Integer> getAllIds() {
+    if (idToName == null) {
+      buildIdToName();
+    }
+    return idToName.keySet();
+  }
+
+  /**
+   * Returns the sub-field identified by the field id.
+   *
+   * @param id a field id
+   * @return the sub-field or null if it is not found
+   */
+  public Field findField(int id) {
+    return buildIdToField().get(id);
+  }
+
+  /**
+   * Returns a sub-field by name as a {@link Field}.
+   * The result may be a top-level or a nested field.
+   *
+   * @param name a String name
+   * @return a Type for the sub-field or null if it is not found
+   */
+  public Field findField(String name) {
+    if (name == null || name.isEmpty()) {
+      return null;
+    }
+    Integer id = buildNameToId().get(name);
+    if (id != null) {
+      return buildIdToField().get(id);
+    }
+    return null;
+  }
+
+  /**
+   * Whether colName exists in current Schema.
+   * Case insensitive.
+   *
+   * @param colName a colName
+   * @return Whether colName exists in current Schema
+   */
+  public boolean findDuplicateCol(String colName) {
+    return idToName.entrySet().stream().map(e -> e.getValue().toLowerCase(Locale.ROOT))
+        .collect(Collectors.toSet()).contains(colName);
+  }
+
+  public int findIdByName(String name) {
+    if (name == null || name.isEmpty()) {
+      return -1;
+    }
+    return buildNameToId().getOrDefault(name, -1);
+  }
+
+  @Override
+  public String toString() {
+    return String.format("table {\n%s\n}",
+        StringUtils.join(record.fields().stream()
+            .map(f -> " " + f)
+            .collect(Collectors.toList()).toArray(new String[0]), "\n"));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    } else if (!(o instanceof InternalSchema)) {
+      return false;
+    }
+    InternalSchema that = (InternalSchema) o;
+    if (versionId != that.schemaId()) {
+      return false;
+    }
+    return record.equals(that.record);
+  }
+
+  @Override
+  public int hashCode() {
+    return record.hashCode();
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/InternalSchemaBuilder.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/InternalSchemaBuilder.java
new file mode 100644
index 0000000000000..5fc86ef723958
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/InternalSchemaBuilder.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema;
+
+import org.apache.hudi.internal.schema.visitor.InternalSchemaVisitor;
+import org.apache.hudi.internal.schema.visitor.NameToIDVisitor;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * A build class to help build fields for InternalSchema
+ */
+public class InternalSchemaBuilder implements Serializable {
+  private static final InternalSchemaBuilder INSTANCE = new InternalSchemaBuilder();
+
+  public static InternalSchemaBuilder getBuilder() {
+    return INSTANCE;
+  }
+
+  private InternalSchemaBuilder() {
+  }
+
+
+  /**
+   * Build a mapping from id to full field name for a internal Type.
+   * if a field y belong to a struct filed x, then the full name of y is x.y
+   *
+   * @param type hoodie internal type
+   * @return a mapping from id to full field name
+   */
+  public Map<Integer, String> buildIdToName(Type type) {
+    Map<Integer, String> result = new HashMap<>();
+    buildNameToId(type).forEach((k, v) -> result.put(v, k));
+    return result;
+  }
+
+  /**
+   * Build a mapping from full field name to id for a internal Type.
+   * if a field y belong to a struct filed x, then the full name of y is x.y
+   *
+   * @param type hoodie internal type
+   * @return a mapping from full field name to id
+   */
+  public Map<String, Integer> buildNameToId(Type type) {
+    return visit(type, new NameToIDVisitor());
+  }
+
+  /**
+   * Use to traverse all types in internalSchema with visitor.
+   *
+   * @param schema hoodie internal schema
+   * @return vistor expected result.
+   */
+  public <T> T visit(InternalSchema schema, InternalSchemaVisitor<T> visitor) {
+    return visitor.schema(schema, visit(schema.getRecord(), visitor));
+  }
+
+  public <T> T visit(Type type, InternalSchemaVisitor<T> visitor) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<T> results = new ArrayList<>();
+        for (Types.Field f : record.fields()) {
+          visitor.beforeField(f);
+          T result;
+          try {
+            result = visit(f.type(), visitor);
+          } finally {
+            visitor.afterField(f);
+          }
+          results.add(visitor.field(f, result));
+        }
+        return visitor.record(record, results);
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        T elementResult;
+        Types.Field elementField = array.field(array.elementId());
+        visitor.beforeArrayElement(elementField);
+        try {
+          elementResult = visit(elementField.type(), visitor);
+        } finally {
+          visitor.afterArrayElement(elementField);
+        }
+        return visitor.array(array, elementResult);
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        T keyResult;
+        T valueResult;
+        Types.Field keyField = map.field(map.keyId());
+        visitor.beforeMapKey(keyField);
+        try {
+          keyResult = visit(map.keyType(), visitor);
+        } finally {
+          visitor.afterMapKey(keyField);
+        }
+        Types.Field valueField = map.field(map.valueId());
+        visitor.beforeMapValue(valueField);
+        try {
+          valueResult = visit(map.valueType(), visitor);
+        } finally {
+          visitor.afterMapValue(valueField);
+        }
+        return visitor.map(map, keyResult, valueResult);
+      default:
+        return visitor.primitive((Type.PrimitiveType)type);
+    }
+  }
+
+  /**
+   * Build a mapping from id to field for a internal Type.
+   *
+   * @param type hoodie internal type
+   * @return a mapping from id to field
+   */
+  public Map<Integer, Types.Field> buildIdToField(Type type) {
+    Map<Integer, Types.Field> idToField = new HashMap<>();
+    visitIdToField(type, idToField);
+    return idToField;
+  }
+
+  private void visitIdToField(Type type, Map<Integer, Types.Field> index) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        for (Types.Field field : record.fields()) {
+          visitIdToField(field.type(), index);
+          index.put(field.fieldId(), field);
+        }
+        return;
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        visitIdToField(array.elementType(), index);
+        for (Types.Field field : array.fields()) {
+          index.put(field.fieldId(), field);
+        }
+        return;
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        visitIdToField(map.keyType(), index);
+        visitIdToField(map.valueType(), index);
+        for (Types.Field field : map.fields()) {
+          index.put(field.fieldId(), field);
+        }
+        return;
+      default:
+        return;
+    }
+  }
+
+  /**
+   * Build a mapping which maintain the relation between child field id and it's parent field id.
+   * if a child field y(which id is 9) belong to a nest field x(which id is 6), then (9 -> 6) will be added to the result map.
+   * if a field has no parent field, nothings will be added.
+   *
+   * @param record hoodie record type.
+   * @return a mapping from id to parentId for a record Type
+   */
+  public Map<Integer, Integer> index2Parents(Types.RecordType record) {
+    Map<Integer, Integer> result = new HashMap<>();
+    Deque<Integer> parentIds = new LinkedList<>();
+    index2Parents(record, parentIds, result);
+    return result;
+  }
+
+  private void index2Parents(Type type, Deque<Integer> pids, Map<Integer, Integer> id2p) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType)type;
+        for (Types.Field f : record.fields()) {
+          pids.push(f.fieldId());
+          index2Parents(f.type(), pids, id2p);
+          pids.pop();
+        }
+
+        for (Types.Field f : record.fields()) {
+          // root record has no parent id.
+          if (!pids.isEmpty()) {
+            Integer pid = pids.peek();
+            id2p.put(f.fieldId(), pid);
+          }
+        }
+        return;
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Types.Field elementField = array.field(array.elementId());
+        pids.push(elementField.fieldId());
+        index2Parents(elementField.type(), pids, id2p);
+        pids.pop();
+        id2p.put(array.elementId(), pids.peek());
+        return;
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        Types.Field keyField = map.field(map.keyId());
+        Types.Field valueField = map.field(map.valueId());
+        // visit key
+        pids.push(map.keyId());
+        index2Parents(keyField.type(), pids, id2p);
+        pids.pop();
+        // visit value
+        pids.push(map.valueId());
+        index2Parents(valueField.type(), pids, id2p);
+        pids.pop();
+        id2p.put(map.keyId(), pids.peek());
+        id2p.put(map.valueId(), pids.peek());
+        return;
+      default:
+    }
+  }
+
+  /**
+   * Assigns new ids for all fields in a Type, based on initial id.
+   *
+   * @param type a type.
+   * @param nextId initial id which used to fresh ids for all fields in a type
+   * @return a new type with new ids
+   */
+  public Type refreshNewId(Type type, AtomicInteger nextId) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Types.Field> oldFields = record.fields();
+        int currentId = nextId.get();
+        nextId.set(currentId + record.fields().size());
+        List<Types.Field> internalFields = new ArrayList<>();
+        for (int i = 0; i < oldFields.size(); i++) {
+          Types.Field oldField = oldFields.get(i);
+          Type fieldType = refreshNewId(oldField.type(), nextId);
+          internalFields.add(Types.Field.get(currentId++, oldField.isOptional(), oldField.name(), fieldType, oldField.doc()));
+        }
+        return Types.RecordType.get(internalFields);
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        int elementId = nextId.get();
+        nextId.set(elementId + 1);
+        Type elementType = refreshNewId(array.elementType(), nextId);
+        return Types.ArrayType.get(elementId, array.isElementOptional(), elementType);
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        int keyId = nextId.get();
+        int valueId = keyId + 1;
+        nextId.set(keyId + 2);
+        Type keyType = refreshNewId(map.keyType(), nextId);
+        Type valueType = refreshNewId(map.valueType(), nextId);
+        return Types.MapType.get(keyId, valueId, keyType, valueType, map.isValueOptional());
+      default:
+        return type;
+    }
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/Type.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/Type.java
new file mode 100644
index 0000000000000..b89cceb21319d
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/Type.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Locale;
+
+/**
+ * The type of a schema, reference avro schema.
+ * now avro version used by hoodie, not support localTime.
+ * to do add support for localTime if avro version is updated
+ */
+public interface Type extends Serializable {
+  enum TypeID {
+    RECORD, ARRAY, MAP, FIXED, STRING, BINARY,
+    INT, LONG, FLOAT, DOUBLE, DATE, BOOLEAN, TIME, TIMESTAMP, DECIMAL, UUID;
+    private String name;
+    TypeID() {
+      this.name = this.name().toLowerCase(Locale.ROOT);
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+
+  static TypeID fromValue(String value) {
+    try {
+      return TypeID.valueOf(value.toUpperCase(Locale.ROOT));
+    } catch (IllegalArgumentException e) {
+      throw new IllegalArgumentException(String.format("Invalid value of Type: %s", value));
+    }
+  }
+
+  TypeID typeId();
+
+  default boolean isNestedType() {
+    return false;
+  }
+
+  abstract class PrimitiveType implements Type {
+    @Override
+    public boolean isNestedType() {
+      return false;
+    }
+  }
+
+  abstract class NestedType implements Type {
+
+    @Override
+    public boolean isNestedType() {
+      return true;
+    }
+
+    public abstract List<Types.Field> fields();
+
+    public abstract Type fieldType(String name);
+
+    public abstract Types.Field field(int id);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/Types.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/Types.java
new file mode 100644
index 0000000000000..fff10a700f618
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/Types.java
@@ -0,0 +1,716 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema;
+
+import org.apache.hudi.internal.schema.Type.PrimitiveType;
+import org.apache.hudi.internal.schema.Type.NestedType;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+public class Types {
+  private Types() {
+  }
+
+  public static class BooleanType extends PrimitiveType {
+    private static final BooleanType INSTANCE = new BooleanType();
+
+    public static BooleanType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return Type.TypeID.BOOLEAN;
+    }
+
+    @Override
+    public String toString() {
+      return "boolean";
+    }
+  }
+
+  public static class IntType extends PrimitiveType {
+    private static final IntType INSTANCE = new IntType();
+
+    public static IntType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.INT;
+    }
+
+    @Override
+    public String toString() {
+      return "int";
+    }
+  }
+
+  public static class LongType extends PrimitiveType {
+    private static final LongType INSTANCE = new LongType();
+
+    public static LongType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.LONG;
+    }
+
+    @Override
+    public String toString() {
+      return "long";
+    }
+  }
+
+  public static class FloatType extends PrimitiveType {
+    private static final FloatType INSTANCE = new FloatType();
+
+    public static FloatType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.FLOAT;
+    }
+
+    @Override
+    public String toString() {
+      return "float";
+    }
+  }
+
+  public static class DoubleType extends PrimitiveType {
+    private static final DoubleType INSTANCE = new DoubleType();
+
+    public static DoubleType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.DOUBLE;
+    }
+
+    @Override
+    public String toString() {
+      return "double";
+    }
+  }
+
+  public static class DateType extends PrimitiveType {
+    private static final DateType INSTANCE = new DateType();
+
+    public static DateType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.DATE;
+    }
+
+    @Override
+    public String toString() {
+      return "date";
+    }
+  }
+
+  public static class TimeType extends PrimitiveType {
+    private static final TimeType INSTANCE = new TimeType();
+
+    public static TimeType get() {
+      return INSTANCE;
+    }
+
+    private TimeType() {
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.TIME;
+    }
+
+    @Override
+    public String toString() {
+      return "time";
+    }
+  }
+
+  public static class TimestampType extends PrimitiveType {
+    private static final TimestampType INSTANCE = new TimestampType();
+
+    public static TimestampType get() {
+      return INSTANCE;
+    }
+
+    private TimestampType() {
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.TIMESTAMP;
+    }
+
+    @Override
+    public String toString() {
+      return "timestamp";
+    }
+  }
+
+  public static class StringType extends PrimitiveType {
+    private static final StringType INSTANCE = new StringType();
+
+    public static StringType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.STRING;
+    }
+
+    @Override
+    public String toString() {
+      return "string";
+    }
+  }
+
+  public static class BinaryType extends PrimitiveType {
+    private static final BinaryType INSTANCE = new BinaryType();
+
+    public static BinaryType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.BINARY;
+    }
+
+    @Override
+    public String toString() {
+      return "binary";
+    }
+  }
+
+  public static class FixedType extends PrimitiveType {
+    public static FixedType getFixed(int size) {
+      return new FixedType(size);
+    }
+
+    private final int size;
+
+    private FixedType(int length) {
+      this.size = length;
+    }
+
+    public int getFixedSize() {
+      return size;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.FIXED;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("fixed[%d]", size);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      } else if (!(o instanceof FixedType)) {
+        return false;
+      }
+
+      FixedType fixedType = (FixedType) o;
+      return size == fixedType.size;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(FixedType.class, size);
+    }
+  }
+
+  public static class DecimalType extends PrimitiveType {
+    public static DecimalType get(int precision, int scale) {
+      return new DecimalType(precision, scale);
+    }
+
+    private final int scale;
+    private final int precision;
+
+    private DecimalType(int precision, int scale) {
+      this.scale = scale;
+      this.precision = precision;
+    }
+
+    /**
+     * Returns whether this DecimalType is wider than `other`. If yes, it means `other`
+     * can be casted into `this` safely without losing any precision or range.
+     */
+    public boolean isWiderThan(PrimitiveType other) {
+      if (other instanceof DecimalType)  {
+        DecimalType dt = (DecimalType) other;
+        return (precision - scale) >= (dt.precision - dt.scale) && scale > dt.scale;
+      }
+      if (other instanceof IntType) {
+        return isWiderThan(get(10, 0));
+      }
+      return false;
+    }
+
+    /**
+     * Returns whether this DecimalType is tighter than `other`. If yes, it means `this`
+     * can be casted into `other` safely without losing any precision or range.
+     */
+    public boolean isTighterThan(PrimitiveType other) {
+      if (other instanceof DecimalType)  {
+        DecimalType dt = (DecimalType) other;
+        return (precision - scale) <= (dt.precision - dt.scale) && scale <= dt.scale;
+      }
+      if (other instanceof IntType) {
+        return isTighterThan(get(10, 0));
+      }
+      return false;
+    }
+
+    public int scale() {
+      return scale;
+    }
+
+    public int precision() {
+      return precision;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.DECIMAL;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("decimal(%d, %d)", precision, scale);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      } else if (!(o instanceof DecimalType)) {
+        return false;
+      }
+
+      DecimalType that = (DecimalType) o;
+      if (scale != that.scale) {
+        return false;
+      }
+      return precision == that.precision;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(DecimalType.class, scale, precision);
+    }
+  }
+
+  public static class UUIDType extends PrimitiveType {
+    private static final UUIDType INSTANCE = new UUIDType();
+
+    public static UUIDType get() {
+      return INSTANCE;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.UUID;
+    }
+
+    @Override
+    public String toString() {
+      return "uuid";
+    }
+  }
+
+  /** A field within a record. */
+  public static class Field implements Serializable {
+    // Experimental method to support defaultValue
+    public static Field get(int id, boolean isOptional, String name, Type type, String doc, Object defaultValue) {
+      return new Field(isOptional, id, name, type, doc, defaultValue);
+    }
+
+    public static Field get(int id, boolean isOptional, String name, Type type, String doc) {
+      return new Field(isOptional, id, name, type, doc, null);
+    }
+
+    public static Field get(int id, boolean isOptional, String name, Type type) {
+      return new Field(isOptional, id, name, type, null, null);
+    }
+
+    public static Field get(int id, String name, Type type) {
+      return new Field(true, id, name, type, null, null);
+    }
+
+    private final boolean isOptional;
+    private final int id;
+    private final String name;
+    private final Type type;
+    private final String doc;
+    // Experimental properties
+    private final Object defaultValue;
+
+    private Field(boolean isOptional, int id, String name, Type type, String doc, Object defaultValue) {
+      this.isOptional = isOptional;
+      this.id = id;
+      this.name = name;
+      this.type = type;
+      this.doc = doc;
+      this.defaultValue = defaultValue;
+    }
+
+    public Object getDefaultValue() {
+      return defaultValue;
+    }
+
+    public boolean isOptional() {
+      return isOptional;
+    }
+
+    public int fieldId() {
+      return id;
+    }
+
+    public String name() {
+      return name;
+    }
+
+    public Type type() {
+      return type;
+    }
+
+    public String doc() {
+      return doc;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("%d: %s: %s %s",
+          id, name, isOptional ? "optional" : "required", type) + (doc != null ? " (" + doc + ")" : "");
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      } else if (!(o instanceof Field)) {
+        return false;
+      }
+
+      Field that = (Field) o;
+      if (isOptional != that.isOptional) {
+        return false;
+      } else if (id != that.id) {
+        return false;
+      } else if (!name.equals(that.name)) {
+        return false;
+      } else if (!Objects.equals(doc, that.doc)) {
+        return false;
+      }
+      return type.equals(that.type);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(Field.class, id, isOptional, name, type);
+    }
+  }
+
+  public static class RecordType extends NestedType {
+
+    public static RecordType get(List<Field> fields) {
+      return new RecordType(fields);
+    }
+
+    public static RecordType get(Field... fields) {
+      return new RecordType(Arrays.asList(fields));
+    }
+
+    private final Field[] fields;
+
+    private transient Map<String, Field> nameToFields = null;
+    private transient Map<Integer, Field> idToFields = null;
+
+    private RecordType(List<Field> fields) {
+      this.fields = new Field[fields.size()];
+      for (int i = 0; i < this.fields.length; i += 1) {
+        this.fields[i] = fields.get(i);
+      }
+    }
+
+    @Override
+    public List<Field> fields() {
+      return Arrays.asList(fields);
+    }
+
+    public Field field(String name) {
+      if (nameToFields == null) {
+        nameToFields = new HashMap<>();
+        for (Field field : fields) {
+          nameToFields.put(field.name().toLowerCase(Locale.ROOT), field);
+        }
+      }
+      return nameToFields.get(name.toLowerCase(Locale.ROOT));
+    }
+
+    @Override
+    public Field field(int id) {
+      if (idToFields == null) {
+        idToFields = new HashMap<>();
+        for (Field field : fields) {
+          idToFields.put(field.fieldId(), field);
+        }
+      }
+      return idToFields.get(id);
+    }
+
+    @Override
+    public Type fieldType(String name) {
+      Field field = field(name);
+      if (field != null) {
+        return field.type();
+      }
+      return null;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.RECORD;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("Record<%s>", Arrays.stream(fields).map(f -> f.toString()).collect(Collectors.joining("-")));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      } else if (!(o instanceof RecordType)) {
+        return false;
+      }
+
+      RecordType that = (RecordType) o;
+      return Arrays.equals(fields, that.fields);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(Field.class, Arrays.hashCode(fields));
+    }
+  }
+
+  public static class ArrayType extends NestedType {
+    public static ArrayType get(int elementId, boolean isOptional, Type elementType) {
+      return new ArrayType(Field.get(elementId, isOptional,"element", elementType));
+    }
+
+    private final Field elementField;
+
+    private ArrayType(Field elementField) {
+      this.elementField = elementField;
+    }
+
+    public Type elementType() {
+      return elementField.type();
+    }
+
+    @Override
+    public Type fieldType(String name) {
+      if ("element".equals(name)) {
+        return elementType();
+      }
+      return null;
+    }
+
+    @Override
+    public Field field(int id) {
+      if (elementField.fieldId() == id) {
+        return elementField;
+      }
+      return null;
+    }
+
+    @Override
+    public List<Field> fields() {
+      return Arrays.asList(elementField);
+    }
+
+    public int elementId() {
+      return elementField.fieldId();
+    }
+
+    public boolean isElementOptional() {
+      return elementField.isOptional;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.ARRAY;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("list<%s>", elementField.type());
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      } else if (!(o instanceof ArrayType)) {
+        return false;
+      }
+      ArrayType listType = (ArrayType) o;
+      return elementField.equals(listType.elementField);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(ArrayType.class, elementField);
+    }
+  }
+
+  public static class MapType extends NestedType {
+
+    public static MapType get(int keyId, int valueId, Type keyType, Type valueType) {
+      return new MapType(
+          Field.get(keyId, "key", keyType),
+          Field.get(valueId, "value", valueType));
+    }
+
+    public static MapType get(int keyId, int valueId, Type keyType, Type valueType, boolean isOptional) {
+      return new MapType(
+          Field.get(keyId, isOptional, "key", keyType),
+          Field.get(valueId, isOptional, "value", valueType));
+    }
+
+    private final Field keyField;
+    private final Field valueField;
+    private transient List<Field> fields = null;
+
+    private MapType(Field keyField, Field valueField) {
+      this.keyField = keyField;
+      this.valueField = valueField;
+    }
+
+    public Type keyType() {
+      return keyField.type();
+    }
+
+    public Type valueType() {
+      return valueField.type();
+    }
+
+    @Override
+    public Type fieldType(String name) {
+      if ("key".equals(name)) {
+        return keyField.type();
+      } else if ("value".equals(name)) {
+        return valueField.type();
+      }
+      return null;
+    }
+
+    @Override
+    public Field field(int id) {
+      if (keyField.fieldId() == id) {
+        return keyField;
+      } else if (valueField.fieldId() == id) {
+        return valueField;
+      }
+      return null;
+    }
+
+    @Override
+    public List<Field> fields() {
+      if (fields == null) {
+        fields = Arrays.asList(keyField, valueField);
+      }
+      return fields;
+    }
+
+    public int keyId() {
+      return keyField.fieldId();
+    }
+
+    public int valueId() {
+      return valueField.fieldId();
+    }
+
+    public boolean isValueOptional() {
+      return valueField.isOptional;
+    }
+
+    @Override
+    public TypeID typeId() {
+      return TypeID.MAP;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("map<%s, %s>", keyField.type(), valueField.type());
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      } else if (!(o instanceof MapType)) {
+        return false;
+      }
+
+      MapType mapType = (MapType) o;
+      if (!keyField.equals(mapType.keyField)) {
+        return false;
+      }
+      return valueField.equals(mapType.valueField);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(MapType.class, keyField, valueField);
+    }
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaChangeApplier.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaChangeApplier.java
new file mode 100644
index 0000000000000..c84d2fa23972a
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaChangeApplier.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.utils.SchemaChangeUtils;
+
+import java.util.Arrays;
+
+/**
+ * Manage schema change for HoodieWriteClient.
+ */
+public class InternalSchemaChangeApplier {
+  private InternalSchema latestSchema;
+
+  public InternalSchemaChangeApplier(InternalSchema latestSchema) {
+    this.latestSchema = latestSchema;
+  }
+
+  /**
+   * Add columns to table.
+   *
+   * @param colName col name to be added. if we want to add col to a nested filed, the fullName should be specify
+   * @param colType col type to be added.
+   * @param doc col doc to be added.
+   * @param position col position to be added
+   * @param positionType col position change type. now support three change types: first/after/before
+   */
+  public InternalSchema applyAddChange(
+      String colName,
+      Type colType,
+      String doc,
+      String position,
+      TableChange.ColumnPositionChange.ColumnPositionType positionType) {
+    TableChanges.ColumnAddChange add = TableChanges.ColumnAddChange.get(latestSchema);
+    String parentName = TableChangesHelper.getParentName(colName);
+    add.addColumns(parentName, colName, colType, doc);
+    if (positionType != null) {
+      switch (positionType) {
+        case NO_OPERATION:
+          break;
+        case FIRST:
+          add.addPositionChange(colName, "", positionType);
+          break;
+        case AFTER:
+        case BEFORE:
+          if (position == null || position.isEmpty()) {
+            throw new IllegalArgumentException("position should not be null/empty_string when specify positionChangeType as after/before");
+          }
+          String referParentName = TableChangesHelper.getParentName(position);
+          if (!parentName.equals(referParentName)) {
+            throw new IllegalArgumentException("cannot reorder two columns which has different parent");
+          }
+          add.addPositionChange(colName, position, positionType);
+          break;
+        default:
+          throw new IllegalArgumentException(String.format("only support first/before/after but found: %s", positionType));
+      }
+    } else {
+      throw new IllegalArgumentException(String.format("positionType should be specified"));
+    }
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, add);
+  }
+
+  /**
+   * Delete columns to table.
+   *
+   * @param colNames col name to be deleted. if we want to delete col from a nested filed, the fullName should be specify
+   */
+  public InternalSchema applyDeleteChange(String... colNames) {
+    TableChanges.ColumnDeleteChange delete = TableChanges.ColumnDeleteChange.get(latestSchema);
+    Arrays.stream(colNames).forEach(colName -> delete.deleteColumn(colName));
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, delete);
+  }
+
+  /**
+   * Rename col name for hudi table.
+   *
+   * @param colName col name to be renamed. if we want to rename col from a nested filed, the fullName should be specify
+   * @param newName new name for current col. no need to specify fullName.
+   */
+  public InternalSchema applyRenameChange(String colName, String newName) {
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(latestSchema);
+    updateChange.renameColumn(colName, newName);
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, updateChange);
+  }
+
+  /**
+   * Update col nullability for hudi table.
+   *
+   * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specify
+   * @param nullable .
+   */
+  public InternalSchema applyColumnNullabilityChange(String colName, boolean nullable) {
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(latestSchema);
+    updateChange.updateColumnNullability(colName, nullable);
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, updateChange);
+  }
+
+  /**
+   * Update col type for hudi table.
+   *
+   * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specify
+   * @param newType .
+   */
+  public InternalSchema applyColumnTypeChange(String colName, Type newType) {
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(latestSchema);
+    updateChange.updateColumnType(colName, newType);
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, updateChange);
+  }
+
+  /**
+   * Update col comment for hudi table.
+   *
+   * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specify
+   * @param doc .
+   */
+  public InternalSchema applyColumnCommentChange(String colName, String doc) {
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(latestSchema);
+    updateChange.updateColumnComment(colName, doc);
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, updateChange);
+  }
+
+  /**
+   * Reorder the position of col.
+   *
+   * @param colName column which need to be reordered. if we want to change col from a nested filed, the fullName should be specify.
+   * @param referColName reference position.
+   * @param positionType col position change type. now support three change types: first/after/before
+   */
+  public InternalSchema applyReOrderColPositionChange(
+      String colName,
+      String referColName,
+      TableChange.ColumnPositionChange.ColumnPositionType positionType) {
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(latestSchema);
+    String parentName = TableChangesHelper.getParentName(colName);
+    String referParentName = TableChangesHelper.getParentName(referColName);
+    if (positionType.equals(TableChange.ColumnPositionChange.ColumnPositionType.FIRST)) {
+      updateChange.addPositionChange(colName, "", positionType);
+    } else if (parentName.equals(referParentName)) {
+      updateChange.addPositionChange(colName, referColName, positionType);
+    } else {
+      throw new IllegalArgumentException("cannot reorder two columns which has different parent");
+    }
+    return SchemaChangeUtils.applyTableChanges2Schema(latestSchema, updateChange);
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaMerger.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaMerger.java
new file mode 100644
index 0000000000000..0d93ab170b374
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaMerger.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Auxiliary class.
+ * help to merge file schema and query schema to produce final read schema for avro/parquet file
+ */
+public class InternalSchemaMerger {
+  private final InternalSchema fileSchema;
+  private final InternalSchema querySchema;
+  // now there exist some bugs when we use spark update/merge api,
+  // those operation will change col nullability from optional to required which is wrong.
+  // Before that bug is fixed, we need to do adapt.
+  // if mergeRequiredFiledForce is true, we will ignore the col's required attribute.
+  private final boolean ignoreRequiredAttribute;
+  // Whether to use column Type from file schema to read files when we find some column type has changed.
+  // spark parquetReader need the original column type to read data, otherwise the parquetReader will failed.
+  // eg: current column type is StringType, now we changed it to decimalType,
+  // we should not pass decimalType to parquetReader, we must pass StringType to it; when we read out the data, we convert data from String to Decimal, everything is ok.
+  // for log reader
+  // since our reWriteRecordWithNewSchema function support rewrite directly, so we no need this parameter
+  // eg: current column type is StringType, now we changed it to decimalType,
+  // we can pass decimalType to reWriteRecordWithNewSchema directly, everything is ok.
+  private boolean useColumnTypeFromFileSchema = true;
+
+  public InternalSchemaMerger(InternalSchema fileSchema, InternalSchema querySchema, boolean ignoreRequiredAttribute, boolean useColumnTypeFromFileSchema) {
+    this.fileSchema = fileSchema;
+    this.querySchema = querySchema;
+    this.ignoreRequiredAttribute = ignoreRequiredAttribute;
+    this.useColumnTypeFromFileSchema = useColumnTypeFromFileSchema;
+  }
+
+  /**
+   * Create final read schema to read avro/parquet file.
+   *
+   * @return read schema to read avro/parquet file.
+   */
+  public InternalSchema mergeSchema() {
+    Types.RecordType record = (Types.RecordType) mergeType(querySchema.getRecord(), 0);
+    return new InternalSchema(record.fields());
+  }
+
+  /**
+   * Create final read schema to read avro/parquet file.
+   * this is auxiliary function used by mergeSchema.
+   */
+  private Type mergeType(Type type, int currentTypeId) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Type> newTypes = new ArrayList<>();
+        for (Types.Field f : record.fields()) {
+          Type newType = mergeType(f.type(), f.fieldId());
+          newTypes.add(newType);
+        }
+        return Types.RecordType.get(buildRecordType(record.fields(), newTypes));
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Type newElementType;
+        Types.Field elementField = array.fields().get(0);
+        newElementType = mergeType(elementField.type(), elementField.fieldId());
+        return buildArrayType(array, newElementType);
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        Type newValueType = mergeType(map.valueType(), map.valueId());
+        return buildMapType(map, newValueType);
+      default:
+        return buildPrimitiveType((Type.PrimitiveType) type, currentTypeId);
+    }
+  }
+
+  private List<Types.Field> buildRecordType(List<Types.Field> oldFields, List<Type> newTypes) {
+    List<Types.Field> newFields = new ArrayList<>();
+    for (int i = 0; i < newTypes.size(); i++) {
+      Type newType = newTypes.get(i);
+      Types.Field oldField = oldFields.get(i);
+      int fieldId = oldField.fieldId();
+      String fullName = querySchema.findfullName(fieldId);
+      if (fileSchema.findField(fieldId) != null) {
+        if (fileSchema.findfullName(fieldId).equals(fullName)) {
+          // maybe col type changed, deal with it.
+          newFields.add(Types.Field.get(oldField.fieldId(), oldField.isOptional(), oldField.name(), newType, oldField.doc()));
+        } else {
+          // find rename, deal with it.
+          newFields.add(dealWithRename(fieldId, newType, oldField));
+        }
+      } else {
+        // buildFullName
+        fullName = normalizeFullName(fullName);
+        if (fileSchema.findField(fullName) != null) {
+          newFields.add(Types.Field.get(oldField.fieldId(), oldField.isOptional(), oldField.name() + "suffix", oldField.type(), oldField.doc()));
+        } else {
+          // find add column
+          // now there exist some bugs when we use spark update/merge api, those operation will change col optional to required.
+          if (ignoreRequiredAttribute) {
+            newFields.add(Types.Field.get(oldField.fieldId(), true, oldField.name(), newType, oldField.doc()));
+          } else {
+            newFields.add(Types.Field.get(oldField.fieldId(), oldField.isOptional(), oldField.name(), newType, oldField.doc()));
+          }
+        }
+      }
+    }
+    return newFields;
+  }
+
+  private Types.Field dealWithRename(int fieldId, Type newType, Types.Field oldField) {
+    Types.Field fieldFromFileSchema = fileSchema.findField(fieldId);
+    String nameFromFileSchema = fieldFromFileSchema.name();
+    Type typeFromFileSchema = fieldFromFileSchema.type();
+    // Current design mechanism guarantees nestedType change is not allowed, so no need to consider.
+    if (newType.isNestedType()) {
+      return Types.Field.get(oldField.fieldId(), oldField.isOptional(), nameFromFileSchema, newType, oldField.doc());
+    } else {
+      return Types.Field.get(oldField.fieldId(), oldField.isOptional(), nameFromFileSchema, useColumnTypeFromFileSchema ? typeFromFileSchema : newType, oldField.doc());
+    }
+  }
+
+  private String normalizeFullName(String fullName) {
+    // find parent rename, and normalize fullName
+    // eg: we renamed a nest field struct(c, d) to aa, the we delete a.d and add it back later.
+    String[] nameParts = fullName.split("\\.");
+    String[] normalizedNameParts = new String[nameParts.length];
+    System.arraycopy(nameParts, 0, normalizedNameParts, 0, nameParts.length);
+    for (int j = 0; j < nameParts.length - 1; j++) {
+      StringBuilder sb = new StringBuilder();
+      for (int k = 0; k <= j; k++) {
+        sb.append(nameParts[k]);
+      }
+      String parentName = sb.toString();
+      int parentFieldIdFromQuerySchema = querySchema.findIdByName(parentName);
+      String parentNameFromFileSchema = fileSchema.findfullName(parentFieldIdFromQuerySchema);
+      if (parentNameFromFileSchema.isEmpty()) {
+        break;
+      }
+      if (!parentNameFromFileSchema.equalsIgnoreCase(parentName)) {
+        // find parent rename, update nameParts
+        String[] parentNameParts = parentNameFromFileSchema.split("\\.");
+        System.arraycopy(parentNameParts, 0, normalizedNameParts, 0, parentNameParts.length);
+      }
+    }
+    return StringUtils.join(normalizedNameParts, ".");
+  }
+
+  private Type buildArrayType(Types.ArrayType array, Type newType) {
+    Types.Field elementField = array.fields().get(0);
+    int elementId = elementField.fieldId();
+    if (elementField.type() == newType) {
+      return array;
+    } else {
+      return Types.ArrayType.get(elementId, elementField.isOptional(), newType);
+    }
+  }
+
+  private Type buildMapType(Types.MapType map, Type newValue) {
+    Types.Field valueFiled = map.fields().get(1);
+    if (valueFiled.type() == newValue) {
+      return map;
+    } else {
+      return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValue, map.isValueOptional());
+    }
+  }
+
+  private Type buildPrimitiveType(Type.PrimitiveType typeFromQuerySchema, int currentPrimitiveTypeId) {
+    Type typeFromFileSchema = fileSchema.findType(currentPrimitiveTypeId);
+    if (typeFromFileSchema == null) {
+      return typeFromQuerySchema;
+    } else {
+      return useColumnTypeFromFileSchema ? typeFromFileSchema : typeFromQuerySchema;
+    }
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChange.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChange.java
new file mode 100644
index 0000000000000..7594f94732a90
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChange.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.InternalSchemaBuilder;
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+/**
+ * TableChange subclasses represent requested changes to a table.
+ * now only column changes support.
+ * to do support partition changes
+ */
+public interface TableChange {
+  /* The action Type of schema change. */
+  enum ColumnChangeID {
+    ADD, UPDATE, DELETE, PROPERTY_CHANGE, REPLACE;
+    private String name;
+
+    private ColumnChangeID() {
+      this.name = this.name().toLowerCase(Locale.ROOT);
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+
+  static ColumnChangeID fromValue(String value) {
+    switch (value.toLowerCase(Locale.ROOT)) {
+      case "add":
+        return ColumnChangeID.ADD;
+      case "change":
+        return ColumnChangeID.UPDATE;
+      case "delete":
+        return ColumnChangeID.DELETE;
+      case "property":
+        return ColumnChangeID.PROPERTY_CHANGE;
+      case "replace":
+        return ColumnChangeID.REPLACE;
+      default:
+        throw new IllegalArgumentException("Invalid value of Type.");
+    }
+  }
+
+  ColumnChangeID columnChangeId();
+
+  default boolean withPositionChange() {
+    return false;
+  }
+
+  abstract class BaseColumnChange implements TableChange {
+    protected final InternalSchema internalSchema;
+    protected final Map<Integer, Integer> id2parent;
+    protected final Map<Integer, ArrayList<ColumnPositionChange>> positionChangeMap = new HashMap<>();
+
+    BaseColumnChange(InternalSchema schema) {
+      this.internalSchema = schema;
+      this.id2parent = InternalSchemaBuilder.getBuilder().index2Parents(schema.getRecord());
+    }
+
+    /**
+     * Add position change.
+     *
+     * @param srcName column which need to be reordered
+     * @param dsrName reference position
+     * @param orderType change types
+     * @return this
+     */
+    public BaseColumnChange addPositionChange(String srcName, String dsrName, ColumnPositionChange.ColumnPositionType orderType) {
+      Integer srcId = findIdByFullName(srcName);
+      Option<Integer> dsrIdOpt = dsrName.isEmpty() ? Option.empty() : Option.of(findIdByFullName(dsrName));
+      Integer srcParentId = id2parent.get(srcId);
+      Option<Integer>  dsrParentIdOpt = dsrIdOpt.map(id2parent::get);
+      // forbid adjust hoodie metadata columns.
+      switch (orderType) {
+        case BEFORE:
+          checkColModifyIsLegal(dsrName);
+          break;
+        case FIRST:
+          if (srcId == null || srcId == -1 || srcParentId == null || srcParentId == -1) {
+            throw new HoodieSchemaException("forbid adjust top-level columns position by using through first syntax");
+          }
+          break;
+        case AFTER:
+          List<String> checkColumns = HoodieRecord.HOODIE_META_COLUMNS.subList(0, HoodieRecord.HOODIE_META_COLUMNS.size() - 2);
+          if (checkColumns.stream().anyMatch(f -> f.equalsIgnoreCase(dsrName))) {
+            throw new HoodieSchemaException("forbid adjust the position of ordinary columns between meta columns");
+          }
+          break;
+        case NO_OPERATION:
+        default:
+          break;
+      }
+      int parentId;
+      if (srcParentId != null && dsrParentIdOpt.isPresent() && srcParentId.equals(dsrParentIdOpt.get())) {
+        Types.Field parentField = internalSchema.findField(srcParentId);
+        if (!(parentField.type() instanceof Types.RecordType)) {
+          throw new HoodieSchemaException(String.format("only support reorder fields in struct type, but find: %s", parentField.type()));
+        }
+        parentId = parentField.fieldId();
+      } else if (srcParentId == null &&  !dsrParentIdOpt.isPresent()) {
+        parentId = -1;
+      } else if (srcParentId != null && !dsrParentIdOpt.isPresent() && orderType.equals(ColumnPositionChange.ColumnPositionType.FIRST)) {
+        parentId = srcParentId;
+      } else {
+        throw new HoodieSchemaException("cannot order position from different parent");
+      }
+
+      ArrayList<ColumnPositionChange> changes = positionChangeMap.getOrDefault(parentId, new ArrayList<>());
+      changes.add(ColumnPositionChange.get(srcId, dsrIdOpt.orElse(-1), orderType));
+      positionChangeMap.put(parentId, changes);
+      return this;
+    }
+
+    public BaseColumnChange addPositionChange(String srcName, String dsrName, String orderType) {
+      return addPositionChange(srcName, dsrName, ColumnPositionChange.fromTypeValue(orderType));
+    }
+
+    /**
+     * Abstract method.
+     * give a column fullName and return the field id
+     *
+     * @param fullName column fullName
+     * @return field id of current column
+     */
+    protected abstract Integer findIdByFullName(String fullName);
+
+    // Modify hudi meta columns is prohibited
+    protected void checkColModifyIsLegal(String colNeedToModfiy) {
+      if (HoodieRecord.HOODIE_META_COLUMNS.stream().anyMatch(f -> f.equalsIgnoreCase(colNeedToModfiy))) {
+        throw new IllegalArgumentException(String.format("cannot modify hudi meta col: %s", colNeedToModfiy));
+      }
+    }
+
+    @Override
+    public boolean withPositionChange() {
+      return false;
+    }
+  }
+
+  /**
+   * Column position change.
+   * now support three change types: FIRST/AFTER/BEFORE
+   * FIRST means the specified column should be the first column.
+   * AFTER means the specified column should be put after the given column.
+   * BEFORE means the specified column should be put before the given column.
+   * Note that, the specified column may be a nested field:
+   * AFTER/BEFORE means the given columns should in the same struct;
+   * FIRST means this field should be the first one within the struct.
+   */
+  class ColumnPositionChange {
+    public enum ColumnPositionType {
+      FIRST,
+      BEFORE,
+      AFTER,
+      // only expose to internal use.
+      NO_OPERATION
+    }
+
+    static ColumnPositionType fromTypeValue(String value) {
+      switch (value.toLowerCase(Locale.ROOT)) {
+        case "first":
+          return ColumnPositionType.FIRST;
+        case "before":
+          return ColumnPositionType.BEFORE;
+        case "after":
+          return ColumnPositionType.AFTER;
+        case "no_operation":
+          return ColumnPositionType.NO_OPERATION;
+        default:
+          throw new IllegalArgumentException(String.format("only support first/before/after but found: %s", value));
+      }
+    }
+
+    private final int srcId;
+    private final int dsrId;
+    private final ColumnPositionType type;
+
+    static ColumnPositionChange first(int srcId) {
+      return new ColumnPositionChange(srcId, -1, ColumnPositionType.FIRST);
+    }
+
+    static ColumnPositionChange before(int srcId, int dsrId) {
+      return new ColumnPositionChange(srcId, dsrId, ColumnPositionType.BEFORE);
+    }
+
+    static ColumnPositionChange after(int srcId, int dsrId) {
+      return new ColumnPositionChange(srcId, dsrId, ColumnPositionType.AFTER);
+    }
+
+    static ColumnPositionChange get(int srcId, int dsrId, String type) {
+      return get(srcId, dsrId, fromTypeValue(type));
+    }
+
+    static ColumnPositionChange get(int srcId, int dsrId, ColumnPositionType type) {
+      switch (type) {
+        case FIRST:
+          return ColumnPositionChange.first(srcId);
+        case BEFORE:
+          return ColumnPositionChange.before(srcId, dsrId);
+        case AFTER:
+          return ColumnPositionChange.after(srcId, dsrId);
+        default:
+          throw new IllegalArgumentException(String.format("only support first/before/after but found: %s", type));
+      }
+    }
+
+    private ColumnPositionChange(int srcId, int dsrId, ColumnPositionType type) {
+      this.srcId = srcId;
+      this.dsrId = dsrId;
+      this.type = type;
+    }
+
+    public int getSrcId() {
+      return srcId;
+    }
+
+    public int getDsrId() {
+      return dsrId;
+    }
+
+    public ColumnPositionType type() {
+      return type;
+    }
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChanges.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChanges.java
new file mode 100644
index 0000000000000..4e0adc27895af
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChanges.java
@@ -0,0 +1,398 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.InternalSchemaBuilder;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.utils.SchemaChangeUtils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class TableChanges {
+
+  /** Deal with update columns changes for table. */
+  public static class ColumnUpdateChange extends TableChange.BaseColumnChange {
+    private final Map<Integer, Types.Field> updates = new HashMap<>();
+
+    public static ColumnUpdateChange get(InternalSchema schema) {
+      return new ColumnUpdateChange(schema);
+    }
+
+    private ColumnUpdateChange(InternalSchema schema) {
+      super(schema);
+    }
+
+    @Override
+    public boolean withPositionChange() {
+      return true;
+    }
+
+    public Type applyUpdates(Types.Field oldField, Type type) {
+      Types.Field update = updates.get(oldField.fieldId());
+      if (update != null && update.type() != oldField.type()) {
+        return update.type();
+      }
+      //
+      ArrayList<ColumnPositionChange> pchanges = positionChangeMap.getOrDefault(oldField.fieldId(), new ArrayList<>());
+      if (!pchanges.isEmpty()) {
+        // when we build ColumnAddChange，we have already done some check, so it's safe to convert newType to RecordType
+        List<Types.Field> newFields = TableChangesHelper.applyAddChange2Fields(((Types.RecordType) type).fields(), new ArrayList<>(), pchanges);
+        return Types.RecordType.get(newFields);
+      }
+      return type;
+    }
+
+    public Map<Integer, Types.Field> getUpdates() {
+      return updates;
+    }
+
+    /**
+     * Update a column in the schema to a new type.
+     * only support update primitive type.
+     * Only updates that widen types are allowed.
+     *
+     * @param name name of the column to update
+     * @param newType new type for the column
+     * @return this
+     * @throws IllegalArgumentException
+     */
+    public ColumnUpdateChange updateColumnType(String name, Type newType) {
+      checkColModifyIsLegal(name);
+      if (newType.isNestedType()) {
+        throw new IllegalArgumentException(String.format("only support update primitive type but find nest column: %s", name));
+      }
+      Types.Field field = internalSchema.findField(name);
+      if (field == null) {
+        throw new IllegalArgumentException(String.format("cannot update a missing column: %s", name));
+      }
+
+      if (!SchemaChangeUtils.isTypeUpdateAllow(field.type(), newType)) {
+        throw new IllegalArgumentException(String.format("cannot update origin type: %s to a incompatibility type: %s", field.type(), newType));
+      }
+
+      if (field.type().equals(newType)) {
+        // do nothings
+        return this;
+      }
+      // save update info
+      Types.Field update = updates.get(field.fieldId());
+      if (update == null) {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), field.isOptional(), field.name(), newType, field.doc()));
+      } else {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), update.isOptional(), update.name(), newType, update.doc()));
+      }
+      return this;
+    }
+
+    /**
+     * Update a column doc in the schema to a new primitive type.
+     *
+     * @param name name of the column to update
+     * @param newDoc new documentation for the column
+     * @return this
+     * @throws IllegalArgumentException
+     */
+    public ColumnUpdateChange updateColumnComment(String name, String newDoc) {
+      checkColModifyIsLegal(name);
+      Types.Field field = internalSchema.findField(name);
+      if (field == null) {
+        throw new IllegalArgumentException(String.format("cannot update a missing column: %s", name));
+      }
+      // consider null
+      if (Objects.equals(field.doc(), newDoc)) {
+        // do nothings
+        return this;
+      }
+      // save update info
+      Types.Field update = updates.get(field.fieldId());
+      if (update == null) {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), field.isOptional(), field.name(), field.type(), newDoc));
+      } else {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), update.isOptional(), update.name(), update.type(), newDoc));
+      }
+      return this;
+    }
+
+    /**
+     * Rename a column in the schema.
+     *
+     * @param name name of the column to rename
+     * @param newName new name for the column
+     * @return this
+     * @throws IllegalArgumentException
+     */
+    public ColumnUpdateChange renameColumn(String name, String newName) {
+      checkColModifyIsLegal(name);
+      Types.Field field = internalSchema.findField(name);
+      if (field == null) {
+        throw new IllegalArgumentException(String.format("cannot update a missing column: %s", name));
+      }
+      if (newName == null || newName.isEmpty()) {
+        throw new IllegalArgumentException(String.format("cannot rename column: %s to empty", name));
+      }
+      // keep consisitent with hive. column names insensitive, so we check 'newName.toLowerCase(Locale.ROOT)'
+      if (internalSchema.findDuplicateCol(newName.toLowerCase(Locale.ROOT))) {
+        throw new IllegalArgumentException(String.format("cannot rename column: %s to a existing name", name));
+      }
+      // save update info
+      Types.Field update = updates.get(field.fieldId());
+      if (update == null) {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), field.isOptional(), newName, field.type(), field.doc()));
+      } else {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), update.isOptional(), newName, update.type(), update.doc()));
+      }
+      return this;
+    }
+
+    /**
+     * Update nullable for column.
+     * only support required type -> optional type
+     *
+     * @param name name of the column to update
+     * @param nullable nullable for updated name
+     * @return this
+     * @throws IllegalArgumentException
+     */
+    public ColumnUpdateChange updateColumnNullability(String name, boolean nullable) {
+      return updateColumnNullability(name, nullable, false);
+    }
+
+    public ColumnUpdateChange updateColumnNullability(String name, boolean nullable, boolean force) {
+      checkColModifyIsLegal(name);
+      Types.Field field = internalSchema.findField(name);
+      if (field == null) {
+        throw new IllegalArgumentException(String.format("cannot update a missing column: %s", name));
+      }
+      if (field.isOptional() == nullable) {
+        // do nothings
+        return this;
+      }
+      if (field.isOptional() && !nullable && !force) {
+        throw new IllegalArgumentException("cannot update column Nullability: optional to required");
+      }
+      // save update info
+      Types.Field update = updates.get(field.fieldId());
+      if (update == null) {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), nullable, field.name(), field.type(), field.doc()));
+      } else {
+        updates.put(field.fieldId(), Types.Field.get(field.fieldId(), nullable, update.name(), update.type(), update.doc()));
+      }
+
+      return this;
+    }
+
+    public Map<Integer, ArrayList<ColumnPositionChange>> getPositionChangeMap() {
+      return positionChangeMap;
+    }
+
+    @Override
+    public ColumnChangeID columnChangeId() {
+      return ColumnChangeID.UPDATE;
+    }
+
+    @Override
+    protected Integer findIdByFullName(String fullName) {
+      Types.Field field = internalSchema.findField(fullName);
+      if (field != null) {
+        return field.fieldId();
+      } else {
+        throw new IllegalArgumentException(String.format("cannot find col id for given column fullName: %s", fullName));
+      }
+    }
+  }
+
+  /** Deal with delete columns changes for table. */
+  public static class ColumnDeleteChange extends TableChange.BaseColumnChange {
+    private final Set deletes = new HashSet<>();
+
+    @Override
+    public ColumnChangeID columnChangeId() {
+      return ColumnChangeID.DELETE;
+    }
+
+    public static ColumnDeleteChange get(InternalSchema schema) {
+      return new ColumnDeleteChange(schema);
+    }
+
+    private ColumnDeleteChange(InternalSchema schema) {
+      super(schema);
+    }
+
+    @Override
+    public boolean withPositionChange() {
+      return false;
+    }
+
+    @Override
+    public BaseColumnChange addPositionChange(String srcId, String dsrId, String orderType) {
+      throw new UnsupportedOperationException("no support add position change for ColumnDeleteChange");
+    }
+
+    public ColumnDeleteChange deleteColumn(String name) {
+      checkColModifyIsLegal(name);
+      Types.Field field = internalSchema.findField(name);
+      if (field == null) {
+        throw new IllegalArgumentException(String.format("cannot delete missing columns: %s", name));
+      }
+      deletes.add(field.fieldId());
+      return this;
+    }
+
+    public Type applyDelete(int id, Type type) {
+      if (deletes.contains(id)) {
+        return null;
+      }
+      return type;
+    }
+
+    public Set<Integer> getDeletes() {
+      return deletes;
+    }
+
+    @Override
+    protected Integer findIdByFullName(String fullName) {
+      throw new UnsupportedOperationException("delete change cannot support this method");
+    }
+  }
+
+  /** Deal with add columns changes for table. */
+  public static class ColumnAddChange extends TableChange.BaseColumnChange {
+    private final Map<String, Integer> fullColName2Id = new HashMap<>();
+    private final Map<Integer, ArrayList<Types.Field>> parentId2AddCols = new HashMap<>();
+    private int nextId;
+
+    public static ColumnAddChange get(InternalSchema internalSchema) {
+      return new ColumnAddChange(internalSchema);
+    }
+
+    public Type applyAdd(Types.Field orignalField, Type type) {
+      int fieldId = orignalField.fieldId();
+      ArrayList<Types.Field> addFields = parentId2AddCols.getOrDefault(fieldId, new ArrayList<>());
+      ArrayList<ColumnPositionChange> pchanges = positionChangeMap.getOrDefault(fieldId, new ArrayList<>());
+
+      if (!addFields.isEmpty() || !pchanges.isEmpty()) {
+        // when we build ColumnAddChange，we have already done some check, so it's safe to convert newType to RecordType
+        List<Types.Field> newFields = TableChangesHelper.applyAddChange2Fields(((Types.RecordType) type).fields(), addFields, pchanges);
+        return Types.RecordType.get(newFields);
+      }
+      return type;
+    }
+
+    public ColumnAddChange addColumns(String name, Type type, String doc) {
+      checkColModifyIsLegal(name);
+      return addColumns("", name, type, doc);
+    }
+
+    public ColumnAddChange addColumns(String parent, String name, Type type, String doc) {
+      checkColModifyIsLegal(name);
+      addColumnsInternal(parent, name, type, doc);
+      return this;
+    }
+
+    private void addColumnsInternal(String parent, String name, Type type, String doc) {
+      // root record has no parent, so set parentId to -1 as default
+      int parentId = -1;
+      // do check
+      String fullName = name;
+      if (!parent.isEmpty()) {
+        Types.Field parentField = internalSchema.findField(parent);
+        if (parentField == null) {
+          throw new HoodieSchemaException(String.format("cannot add column: %s which parent: %s is not exist", name, parent));
+        }
+        Type parentType = parentField.type();
+        if (!(parentField.type() instanceof Types.RecordType)) {
+          throw new HoodieSchemaException("only support add nested columns to struct column");
+        }
+        parentId = parentField.fieldId();
+        Types.Field newParentField = internalSchema.findField(parent + "."  + name);
+        if (newParentField != null) {
+          throw new HoodieSchemaException(String.format("cannot add column: %s which already exist", name));
+        }
+        fullName = parent + "." + name;
+      } else {
+        // keep consistent with hive, column name case insensitive
+        if (internalSchema.findDuplicateCol(name.toLowerCase(Locale.ROOT))) {
+          throw new HoodieSchemaException(String.format("cannot add column: %s which already exist", name));
+        }
+      }
+      if (fullColName2Id.containsKey(fullName)) {
+        throw new HoodieSchemaException(String.format("cannot repeat add column: %s", name));
+      }
+      fullColName2Id.put(fullName, nextId);
+      if (parentId != -1) {
+        id2parent.put(nextId, parentId);
+      }
+      AtomicInteger assignNextId = new AtomicInteger(nextId + 1);
+      Type typeWithNewId = InternalSchemaBuilder.getBuilder().refreshNewId(type, assignNextId);
+      // only allow add optional columns.
+      ArrayList<Types.Field> adds = parentId2AddCols.getOrDefault(parentId, new ArrayList<>());
+      adds.add(Types.Field.get(nextId, true, name, typeWithNewId, doc));
+      parentId2AddCols.put(parentId, adds);
+      nextId = assignNextId.get();
+    }
+
+    private ColumnAddChange(InternalSchema internalSchema) {
+      super(internalSchema);
+      this.nextId = internalSchema.getMaxColumnId() + 1;
+    }
+
+    public Map<Integer, ArrayList<Types.Field>> getParentId2AddCols() {
+      return parentId2AddCols;
+    }
+
+    public Map<Integer, ArrayList<ColumnPositionChange>> getPositionChangeMap() {
+      return positionChangeMap;
+    }
+
+    // expose to test
+    public Map<String, Integer> getFullColName2Id() {
+      return fullColName2Id;
+    }
+
+    protected Integer findIdByFullName(String fullName) {
+      Types.Field field = internalSchema.findField(fullName);
+      if (field != null) {
+        return field.fieldId();
+      }
+      return fullColName2Id.getOrDefault(fullName, -1);
+    }
+
+    @Override
+    public ColumnChangeID columnChangeId() {
+      return ColumnChangeID.ADD;
+    }
+
+    @Override
+    public boolean withPositionChange() {
+      return true;
+    }
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChangesHelper.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChangesHelper.java
new file mode 100644
index 0000000000000..d38c83d220bf2
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/action/TableChangesHelper.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Helper class to support Table schema changes.
+ */
+public class TableChangesHelper {
+  /**
+   * Apply add operation and column position change operation.
+   *
+   * @param fields origin column fields.
+   * @param adds column fields to be added.
+   * @param pchanges a wrapper class hold all the position change operations.
+   * @return column fields after adjusting the position.
+   */
+  public static List<Types.Field> applyAddChange2Fields(List<Types.Field> fields, ArrayList<Types.Field> adds, ArrayList<TableChange.ColumnPositionChange> pchanges) {
+    if (adds == null && pchanges == null) {
+      return fields;
+    }
+    LinkedList<Types.Field> result = new LinkedList<>(fields);
+    // apply add columns
+    if (adds != null && !adds.isEmpty()) {
+      result.addAll(adds);
+    }
+    // apply position change
+    if (pchanges != null && !pchanges.isEmpty()) {
+      for (TableChange.ColumnPositionChange pchange : pchanges) {
+        Types.Field srcField = result.stream().filter(f -> f.fieldId() == pchange.getSrcId()).findFirst().get();
+        Types.Field dsrField = result.stream().filter(f -> f.fieldId() == pchange.getDsrId()).findFirst().orElse(null);
+        // we remove srcField first
+        result.remove(srcField);
+        switch (pchange.type()) {
+          case AFTER:
+            // add srcField after dsrField
+            result.add(result.indexOf(dsrField) + 1, srcField);
+            break;
+          case BEFORE:
+            // add srcField before dsrField
+            result.add(result.indexOf(dsrField), srcField);
+            break;
+          case FIRST:
+            result.addFirst(srcField);
+            break;
+          default:
+            // should not reach here
+        }
+      }
+    }
+    return result;
+  }
+
+  public static String getParentName(String fullColName) {
+    int offset = fullColName.lastIndexOf(".");
+    return offset > 0 ? fullColName.substring(0, offset) : "";
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java
new file mode 100644
index 0000000000000..360134f92803b
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/AvroInternalSchemaConverter.java
@@ -0,0 +1,436 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.convert;
+
+import org.apache.avro.JsonProperties;
+import org.apache.avro.LogicalType;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.apache.avro.Schema.Type.UNION;
+
+/**
+ * Auxiliary class.
+ * Converts an avro schema into InternalSchema, or convert InternalSchema to an avro schema
+ */
+public class AvroInternalSchemaConverter {
+
+  /**
+   * Convert internalSchema to avro Schema.
+   *
+   * @param internalSchema internal schema.
+   * @param tableName the record name.
+   * @return an avro Schema.
+   */
+  public static Schema convert(InternalSchema internalSchema, String tableName) {
+    return buildAvroSchemaFromInternalSchema(internalSchema, tableName);
+  }
+
+  /**
+   * Convert RecordType to avro Schema.
+   *
+   * @param type internal schema.
+   * @param name the record name.
+   * @return an avro Schema.
+   */
+  public static Schema convert(Types.RecordType type, String name) {
+    return buildAvroSchemaFromType(type, name);
+  }
+
+  /**
+   * Convert internal type to avro Schema.
+   *
+   * @param type internal type.
+   * @param name the record name.
+   * @return an avro Schema.
+   */
+  public static Schema convert(Type type, String name) {
+    return buildAvroSchemaFromType(type, name);
+  }
+
+  /** Convert an avro schema into internal type. */
+  public static Type convertToField(Schema schema) {
+    return buildTypeFromAvroSchema(schema);
+  }
+
+  /** Convert an avro schema into internalSchema. */
+  public static InternalSchema convert(Schema schema) {
+    List<Types.Field> fields = ((Types.RecordType) convertToField(schema)).fields();
+    return new InternalSchema(fields);
+  }
+
+  /** Check whether current avro schema is optional?. */
+  public static boolean isOptional(Schema schema) {
+    if (schema.getType() == UNION && schema.getTypes().size() == 2) {
+      return schema.getTypes().get(0).getType() == Schema.Type.NULL || schema.getTypes().get(1).getType() == Schema.Type.NULL;
+    }
+    return false;
+  }
+
+  /** Returns schema with nullable true. */
+  public static Schema nullableSchema(Schema schema) {
+    if (schema.getType() == UNION) {
+      if (!isOptional(schema)) {
+        throw new HoodieSchemaException(String.format("Union schemas are not supported: %s", schema));
+      }
+      return schema;
+    } else {
+      return Schema.createUnion(Schema.create(Schema.Type.NULL), schema);
+    }
+  }
+
+  /**
+   * Build hudi type from avro schema.
+   *
+   * @param schema a avro schema.
+   * @return a hudi type.
+   */
+  public static Type buildTypeFromAvroSchema(Schema schema) {
+    // set flag to check this has not been visited.
+    Deque<String> visited = new LinkedList();
+    AtomicInteger nextId = new AtomicInteger(1);
+    return visitAvroSchemaToBuildType(schema, visited, true, nextId);
+  }
+
+  /**
+   * Converts an avro schema into hudi type.
+   *
+   * @param schema a avro schema.
+   * @param visited track the visit node when do traversal for avro schema; used to check if the name of avro record schema is correct.
+   * @param firstVisitRoot track whether the current visited schema node is a root node.
+   * @param nextId a initial id which used to create id for all fields.
+   * @return a hudi type match avro schema.
+   */
+  private static Type visitAvroSchemaToBuildType(Schema schema, Deque<String> visited, Boolean firstVisitRoot, AtomicInteger nextId) {
+    switch (schema.getType()) {
+      case RECORD:
+        String name = schema.getFullName();
+        if (visited.contains(name)) {
+          throw new HoodieSchemaException(String.format("cannot convert recursive avro record %s", name));
+        }
+        visited.push(name);
+        List<Schema.Field> fields = schema.getFields();
+        List<Type> fieldTypes = new ArrayList<>(fields.size());
+        int nextAssignId = nextId.get();
+        // when first visit root record, set nextAssignId = 0;
+        if (firstVisitRoot) {
+          nextAssignId = 0;
+        }
+        nextId.set(nextAssignId + fields.size());
+        fields.stream().forEach(field -> {
+          fieldTypes.add(visitAvroSchemaToBuildType(field.schema(), visited, false, nextId));
+        });
+        visited.pop();
+        List<Types.Field> internalFields = new ArrayList<>(fields.size());
+
+        for (int i  = 0; i < fields.size(); i++) {
+          Schema.Field field = fields.get(i);
+          Type fieldType = fieldTypes.get(i);
+          internalFields.add(Types.Field.get(nextAssignId, AvroInternalSchemaConverter.isOptional(field.schema()), field.name(), fieldType, field.doc()));
+          nextAssignId += 1;
+        }
+        return Types.RecordType.get(internalFields);
+      case UNION:
+        List<Type> fTypes = new ArrayList<>();
+        schema.getTypes().stream().forEach(t -> {
+          fTypes.add(visitAvroSchemaToBuildType(t, visited, false, nextId));
+        });
+        return fTypes.get(0) == null ? fTypes.get(1) : fTypes.get(0);
+      case ARRAY:
+        Schema elementSchema = schema.getElementType();
+        int elementId = nextId.get();
+        nextId.set(elementId + 1);
+        Type elementType = visitAvroSchemaToBuildType(elementSchema, visited, false, nextId);
+        return Types.ArrayType.get(elementId, AvroInternalSchemaConverter.isOptional(schema.getElementType()), elementType);
+      case MAP:
+        int keyId = nextId.get();
+        int valueId = keyId + 1;
+        nextId.set(valueId + 1);
+        Type valueType = visitAvroSchemaToBuildType(schema.getValueType(),  visited, false, nextId);
+        return Types.MapType.get(keyId, valueId, Types.StringType.get(), valueType, AvroInternalSchemaConverter.isOptional(schema.getValueType()));
+      default:
+        return visitAvroPrimitiveToBuildInternalType(schema);
+    }
+  }
+
+  private static Type visitAvroPrimitiveToBuildInternalType(Schema primitive) {
+    LogicalType logical = primitive.getLogicalType();
+    if (logical != null) {
+      String name = logical.getName();
+      if (logical instanceof LogicalTypes.Decimal) {
+        return Types.DecimalType.get(
+                ((LogicalTypes.Decimal) logical).getPrecision(),
+                ((LogicalTypes.Decimal) logical).getScale());
+
+      } else if (logical instanceof LogicalTypes.Date) {
+        return Types.DateType.get();
+
+      } else if (
+              logical instanceof LogicalTypes.TimeMillis
+                      || logical instanceof LogicalTypes.TimeMicros) {
+        return Types.TimeType.get();
+
+      } else if (
+              logical instanceof LogicalTypes.TimestampMillis
+                      || logical instanceof LogicalTypes.TimestampMicros) {
+        return Types.TimestampType.get();
+      } else if (LogicalTypes.uuid().getName().equals(name)) {
+        return Types.UUIDType.get();
+      }
+    }
+
+    switch (primitive.getType()) {
+      case BOOLEAN:
+        return Types.BooleanType.get();
+      case INT:
+        return Types.IntType.get();
+      case LONG:
+        return Types.LongType.get();
+      case FLOAT:
+        return Types.FloatType.get();
+      case DOUBLE:
+        return Types.DoubleType.get();
+      case STRING:
+      case ENUM:
+        return Types.StringType.get();
+      case FIXED:
+        return Types.FixedType.getFixed(primitive.getFixedSize());
+      case BYTES:
+        return Types.BinaryType.get();
+      case NULL:
+        return null;
+      default:
+        throw new UnsupportedOperationException("Unsupported primitive type: " + primitive);
+    }
+  }
+
+  /**
+   * Converts hudi type into an Avro Schema.
+   *
+   * @param type a hudi type.
+   * @param recordName the record name
+   * @return a Avro schema match this type
+   */
+  public static Schema buildAvroSchemaFromType(Type type, String recordName) {
+    Map<Type, Schema> cache = new HashMap<>();
+    return visitInternalSchemaToBuildAvroSchema(type, cache, recordName);
+  }
+
+  /**
+   * Converts hudi internal Schema into an Avro Schema.
+   *
+   * @param schema a hudi internal Schema.
+   * @param recordName the record name
+   * @return a Avro schema match hudi internal schema.
+   */
+  public static Schema buildAvroSchemaFromInternalSchema(InternalSchema schema, String recordName) {
+    Map<Type, Schema> cache = new HashMap<>();
+    return visitInternalSchemaToBuildAvroSchema(schema.getRecord(), cache, recordName);
+  }
+
+  /**
+   * Converts hudi type into an Avro Schema.
+   *
+   * @param type a hudi type.
+   * @param cache use to cache intermediate convert result to save cost.
+   * @param recordName the record name
+   * @return a Avro schema match this type
+   */
+  private static Schema visitInternalSchemaToBuildAvroSchema(Type type, Map<Type, Schema> cache, String recordName) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Schema> schemas = new ArrayList<>();
+        record.fields().forEach(f -> {
+          Schema tempSchema = visitInternalSchemaToBuildAvroSchema(f.type(), cache, recordName + "_" + f.name());
+          // convert tempSchema
+          Schema result = f.isOptional() ? AvroInternalSchemaConverter.nullableSchema(tempSchema) : tempSchema;
+          schemas.add(result);
+        });
+        // check visited
+        Schema recordSchema;
+        recordSchema = cache.get(record);
+        if (recordSchema != null) {
+          return recordSchema;
+        }
+        recordSchema = visitInternalRecordToBuildAvroRecord(record, schemas, recordName);
+        cache.put(record, recordSchema);
+        return recordSchema;
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Schema elementSchema;
+        elementSchema = visitInternalSchemaToBuildAvroSchema(array.elementType(), cache, recordName);
+        Schema arraySchema;
+        arraySchema = cache.get(array);
+        if (arraySchema != null) {
+          return arraySchema;
+        }
+        arraySchema = visitInternalArrayToBuildAvroArray(array, elementSchema);
+        cache.put(array, arraySchema);
+        return arraySchema;
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        Schema keySchema;
+        Schema valueSchema;
+        keySchema = visitInternalSchemaToBuildAvroSchema(map.keyType(), cache, recordName);
+        valueSchema = visitInternalSchemaToBuildAvroSchema(map.valueType(), cache, recordName);
+        Schema mapSchema;
+        mapSchema = cache.get(map);
+        if (mapSchema != null) {
+          return mapSchema;
+        }
+        mapSchema = visitInternalMapToBuildAvroMap(map, keySchema, valueSchema);
+        cache.put(map, mapSchema);
+        return mapSchema;
+      default:
+        Schema primitiveSchema = visitInternalPrimitiveToBuildAvroPrimitiveType((Type.PrimitiveType) type);
+        cache.put(type, primitiveSchema);
+        return primitiveSchema;
+    }
+  }
+
+  /**
+   * Converts hudi RecordType to Avro RecordType.
+   * this is auxiliary function used by visitInternalSchemaToBuildAvroSchema
+   */
+  private static Schema visitInternalRecordToBuildAvroRecord(Types.RecordType record, List<Schema> fieldSchemas, String recordName) {
+    List<Types.Field> fields = record.fields();
+    List<Schema.Field> avroFields = new ArrayList<>();
+    for (int i = 0; i < fields.size(); i++) {
+      Types.Field f = fields.get(i);
+      Schema.Field field = new Schema.Field(f.name(), fieldSchemas.get(i), f.doc(), f.isOptional() ? JsonProperties.NULL_VALUE : null);
+      avroFields.add(field);
+    }
+    return Schema.createRecord(recordName, null, null, false, avroFields);
+  }
+
+  /**
+   * Converts hudi ArrayType to Avro ArrayType.
+   * this is auxiliary function used by visitInternalSchemaToBuildAvroSchema
+   */
+  private static Schema visitInternalArrayToBuildAvroArray(Types.ArrayType array, Schema elementSchema) {
+    Schema result;
+    if (array.isElementOptional()) {
+      result = Schema.createArray(AvroInternalSchemaConverter.nullableSchema(elementSchema));
+    } else {
+      result = Schema.createArray(elementSchema);
+    }
+    return result;
+  }
+
+  /**
+   * Converts hudi MapType to Avro MapType.
+   * this is auxiliary function used by visitInternalSchemaToBuildAvroSchema
+   */
+  private static Schema visitInternalMapToBuildAvroMap(Types.MapType map, Schema keySchema, Schema valueSchema) {
+    Schema mapSchema;
+    if (keySchema.getType() == Schema.Type.STRING) {
+      mapSchema = Schema.createMap(map.isValueOptional() ? AvroInternalSchemaConverter.nullableSchema(valueSchema) : valueSchema);
+    } else {
+      throw new HoodieSchemaException("only support StringType key for avro MapType");
+    }
+    return mapSchema;
+  }
+
+  /**
+   * Converts hudi PrimitiveType to Avro PrimitiveType.
+   * this is auxiliary function used by visitInternalSchemaToBuildAvroSchema
+   */
+  private static Schema visitInternalPrimitiveToBuildAvroPrimitiveType(Type.PrimitiveType primitive) {
+    Schema primitiveSchema;
+    switch (primitive.typeId()) {
+      case BOOLEAN:
+        primitiveSchema = Schema.create(Schema.Type.BOOLEAN);
+        break;
+      case INT:
+        primitiveSchema = Schema.create(Schema.Type.INT);
+        break;
+      case LONG:
+        primitiveSchema = Schema.create(Schema.Type.LONG);
+        break;
+      case FLOAT:
+        primitiveSchema = Schema.create(Schema.Type.FLOAT);
+        break;
+      case DOUBLE:
+        primitiveSchema = Schema.create(Schema.Type.DOUBLE);
+        break;
+      case DATE:
+        primitiveSchema = LogicalTypes.date()
+                .addToSchema(Schema.create(Schema.Type.INT));
+        break;
+      case TIME:
+        primitiveSchema = LogicalTypes.timeMicros()
+                .addToSchema(Schema.create(Schema.Type.LONG));
+        break;
+      case TIMESTAMP:
+        primitiveSchema = LogicalTypes.timestampMicros()
+                .addToSchema(Schema.create(Schema.Type.LONG));
+        break;
+      case STRING:
+        primitiveSchema = Schema.create(Schema.Type.STRING);
+        break;
+      case UUID:
+        primitiveSchema = LogicalTypes.uuid()
+                .addToSchema(Schema.createFixed("uuid_fixed", null, null, 16));
+        break;
+      case FIXED:
+        Types.FixedType fixed = (Types.FixedType) primitive;
+        primitiveSchema = Schema.createFixed("fixed_" + fixed.getFixedSize(), null, null, fixed.getFixedSize());
+        break;
+      case BINARY:
+        primitiveSchema = Schema.create(Schema.Type.BYTES);
+        break;
+      case DECIMAL:
+        Types.DecimalType decimal = (Types.DecimalType) primitive;
+        primitiveSchema = LogicalTypes.decimal(decimal.precision(), decimal.scale())
+                .addToSchema(Schema.createFixed(
+                        "decimal_" + decimal.precision() + "_" + decimal.scale(),
+                        null, null, computeMinBytesForPrecision(decimal.precision())));
+        break;
+      default:
+        throw new UnsupportedOperationException(
+                "Unsupported type ID: " + primitive.typeId());
+    }
+    return primitiveSchema;
+  }
+
+  /**
+   * Return the minimum number of bytes needed to store a decimal with a give 'precision'.
+   * reference from Spark release 3.1 .
+   */
+  private static int computeMinBytesForPrecision(int precision) {
+    int numBytes = 1;
+    while (Math.pow(2.0, 8 * numBytes - 1) < Math.pow(10.0, precision)) {
+      numBytes += 1;
+    }
+    return numBytes;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/AbstractInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/AbstractInternalSchemaStorageManager.java
new file mode 100644
index 0000000000000..d4db68425fda8
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/AbstractInternalSchemaStorageManager.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.io;
+
+import org.apache.hudi.common.util.Option;
+
+import java.util.List;
+
+abstract class AbstractInternalSchemaStorageManager {
+
+  /**
+   * Persist history schema str.
+   */
+  public abstract void persistHistorySchemaStr(String instantTime, String historySchemaStr);
+
+  /**
+   * Get latest history schema string.
+   */
+  public abstract String getHistorySchemaStr();
+
+  /**
+   * Get latest history schema string.
+   * Using give validCommits to validate all legal histroy Schema files, and return the latest one.
+   * If the passed valid commits is null or empty, valid instants will be fetched from the file-system and used.
+   */
+  public abstract String getHistorySchemaStrByGivenValidCommits(List<String> validCommits);
+
+  /**
+   * Get internalSchema by using given versionId
+   *
+   * @param versionId schema version_id need to search
+   * @return internalSchema
+   */
+  public abstract Option getSchemaByKey(String versionId);
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java
new file mode 100644
index 0000000000000..6cca0728a8312
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.io;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.SCHEMA_COMMIT_ACTION;
+
+public class FileBasedInternalSchemaStorageManager extends AbstractInternalSchemaStorageManager {
+  private static final Logger LOG = LogManager.getLogger(FileBasedInternalSchemaStorageManager.class);
+
+  public static final String SCHEMA_NAME = ".schema";
+  private final Path baseSchemaPath;
+  private final Configuration conf;
+  private HoodieTableMetaClient metaClient;
+
+  public FileBasedInternalSchemaStorageManager(Configuration conf, Path baseTablePath) {
+    Path metaPath = new Path(baseTablePath, ".hoodie");
+    this.baseSchemaPath = new Path(metaPath, SCHEMA_NAME);
+    this.conf = conf;
+  }
+
+  public FileBasedInternalSchemaStorageManager(HoodieTableMetaClient metaClient) {
+    Path metaPath = new Path(metaClient.getBasePath(), ".hoodie");
+    this.baseSchemaPath = new Path(metaPath, SCHEMA_NAME);
+    this.conf = metaClient.getHadoopConf();
+    this.metaClient = metaClient;
+  }
+
+  // make metaClient build lazy
+  private HoodieTableMetaClient getMetaClient() {
+    if (metaClient == null) {
+      metaClient = HoodieTableMetaClient.builder().setBasePath(baseSchemaPath.getParent().getParent().toString()).setConf(conf).build();
+    }
+    return metaClient;
+  }
+
+  @Override
+  public void persistHistorySchemaStr(String instantTime, String historySchemaStr) {
+    cleanResidualFiles();
+    HoodieActiveTimeline timeline = getMetaClient().getActiveTimeline();
+    HoodieInstant hoodieInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, SCHEMA_COMMIT_ACTION, instantTime);
+    timeline.createNewInstant(hoodieInstant);
+    byte[] writeContent = historySchemaStr.getBytes(StandardCharsets.UTF_8);
+    timeline.transitionRequestedToInflight(hoodieInstant, Option.empty());
+    timeline.saveAsComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, hoodieInstant.getAction(), hoodieInstant.getTimestamp()), Option.of(writeContent));
+    LOG.info(String.format("persist history schema success on commit time: %s", instantTime));
+  }
+
+  private void cleanResidualFiles() {
+    List<String> validateCommits = getValidInstants();
+    try {
+      FileSystem fs = baseSchemaPath.getFileSystem(conf);
+      if (fs.exists(baseSchemaPath)) {
+        List<String> candidateSchemaFiles = Arrays.stream(fs.listStatus(baseSchemaPath)).filter(f -> f.isFile())
+            .map(file -> file.getPath().getName()).collect(Collectors.toList());
+        List<String> residualSchemaFiles = candidateSchemaFiles.stream().filter(f -> !validateCommits.contains(f.split("\\.")[0])).collect(Collectors.toList());
+        // clean residual files
+        residualSchemaFiles.forEach(f -> {
+          try {
+            fs.delete(new Path(getMetaClient().getSchemaFolderName(), f));
+          } catch (IOException o) {
+            throw new HoodieException(o);
+          }
+        });
+      }
+    } catch (IOException e) {
+      throw new HoodieException(e);
+    }
+  }
+
+  public void cleanOldFiles(List<String> validateCommits) {
+    try {
+      FileSystem fs = baseSchemaPath.getFileSystem(conf);
+      if (fs.exists(baseSchemaPath)) {
+        List<String> candidateSchemaFiles = Arrays.stream(fs.listStatus(baseSchemaPath)).filter(f -> f.isFile())
+            .map(file -> file.getPath().getName()).collect(Collectors.toList());
+        List<String> validateSchemaFiles = candidateSchemaFiles.stream().filter(f -> validateCommits.contains(f.split("\\.")[0])).collect(Collectors.toList());
+        for (int i = 0; i < validateSchemaFiles.size(); i++) {
+          fs.delete(new Path(validateSchemaFiles.get(i)));
+        }
+      }
+    } catch (IOException e) {
+      throw new HoodieException(e);
+    }
+  }
+
+  private List<String> getValidInstants() {
+    return getMetaClient().getCommitsTimeline()
+        .filterCompletedInstants().getInstants().map(f -> f.getTimestamp()).collect(Collectors.toList());
+  }
+
+  @Override
+  public String getHistorySchemaStr() {
+    return getHistorySchemaStrByGivenValidCommits(Collections.EMPTY_LIST);
+  }
+
+  @Override
+  public String getHistorySchemaStrByGivenValidCommits(List<String> validCommits) {
+    List<String> commitList = validCommits == null || validCommits.isEmpty() ? getValidInstants() : validCommits;
+    try {
+      FileSystem fs = FSUtils.getFs(baseSchemaPath.toString(), conf);
+      if (fs.exists(baseSchemaPath)) {
+        List<String> validaSchemaFiles = Arrays.stream(fs.listStatus(baseSchemaPath))
+            .filter(f -> f.isFile() && f.getPath().getName().endsWith(SCHEMA_COMMIT_ACTION))
+            .map(file -> file.getPath().getName()).filter(f -> commitList.contains(f.split("\\.")[0])).sorted().collect(Collectors.toList());
+        if (!validaSchemaFiles.isEmpty()) {
+          Path latestFilePath = new Path(baseSchemaPath, validaSchemaFiles.get(validaSchemaFiles.size() - 1));
+          byte[] content;
+          try (FSDataInputStream is = fs.open(latestFilePath)) {
+            content = FileIOUtils.readAsByteArray(is);
+            LOG.info(String.format("read history schema success from file : %s", latestFilePath));
+            return new String(content, StandardCharsets.UTF_8);
+          } catch (IOException e) {
+            throw new HoodieIOException("Could not read history schema from " + latestFilePath, e);
+          }
+        }
+      }
+    } catch (IOException io) {
+      throw new HoodieException(io);
+    }
+    LOG.info("failed to read history schema");
+    return "";
+  }
+
+  @Override
+  public Option<InternalSchema> getSchemaByKey(String versionId) {
+    String historySchemaStr = getHistorySchemaStr();
+    TreeMap<Long, InternalSchema> treeMap;
+    if (historySchemaStr.isEmpty()) {
+      return Option.empty();
+    } else {
+      treeMap = SerDeHelper.parseSchemas(historySchemaStr);
+      InternalSchema result = InternalSchemaUtils.searchSchema(Long.valueOf(versionId), treeMap);
+      if (result == null) {
+        return Option.empty();
+      }
+      return Option.of(result);
+    }
+  }
+}
+
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java
new file mode 100644
index 0000000000000..e57fce4357b25
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.utils;
+
+import org.apache.avro.Schema;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.action.TableChanges;
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+
+/**
+ * Utility methods to support evolve old avro schema based on a given schema.
+ */
+public class AvroSchemaEvolutionUtils {
+  /**
+   * Support evolution from a new avroSchema.
+   * Now hoodie support implicitly add columns when hoodie write operation,
+   * This ability needs to be preserved, so implicitly evolution for internalSchema should supported.
+   *
+   * @param evolvedSchema implicitly evolution of avro when hoodie write operation
+   * @param oldSchema old internalSchema
+   * @param supportPositionReorder support position reorder
+   * @return evolution Schema
+   */
+  public static InternalSchema evolveSchemaFromNewAvroSchema(Schema evolvedSchema, InternalSchema oldSchema, Boolean supportPositionReorder) {
+    InternalSchema evolvedInternalSchema = AvroInternalSchemaConverter.convert(evolvedSchema);
+    // do check, only support add column evolution
+    List<String> colNamesFromEvolved = evolvedInternalSchema.getAllColsFullName();
+    List<String> colNamesFromOldSchema = oldSchema.getAllColsFullName();
+    List<String> diffFromOldSchema = colNamesFromOldSchema.stream().filter(f -> !colNamesFromEvolved.contains(f)).collect(Collectors.toList());
+    List<Types.Field> newFields = new ArrayList<>();
+    if (colNamesFromEvolved.size() == colNamesFromOldSchema.size() && diffFromOldSchema.size() == 0) {
+      // no changes happen
+      if (supportPositionReorder) {
+        evolvedInternalSchema.getRecord().fields().forEach(f -> newFields.add(oldSchema.getRecord().field(f.name())));
+        return new InternalSchema(newFields);
+      }
+      return oldSchema;
+    }
+    // try to find all added columns
+    if (diffFromOldSchema.size() != 0) {
+      throw new UnsupportedOperationException("Cannot evolve schema implicitly, find delete/rename operation");
+    }
+
+    List<String> diffFromEvolutionSchema = colNamesFromEvolved.stream().filter(f -> !colNamesFromOldSchema.contains(f)).collect(Collectors.toList());
+    // Remove redundancy from diffFromEvolutionSchema.
+    // for example, now we add a struct col in evolvedSchema, the struct col is " user struct<name:string, age:int> "
+    // when we do diff operation: user, user.name, user.age will appeared in the resultSet which is redundancy, user.name and user.age should be excluded.
+    // deal with add operation
+    TreeMap<Integer, String> finalAddAction = new TreeMap<>();
+    for (int i = 0; i < diffFromEvolutionSchema.size(); i++)  {
+      String name = diffFromEvolutionSchema.get(i);
+      int splitPoint = name.lastIndexOf(".");
+      String parentName = splitPoint > 0 ? name.substring(0, splitPoint) : "";
+      if (!parentName.isEmpty() && diffFromEvolutionSchema.contains(parentName)) {
+        // find redundancy, skip it
+        continue;
+      }
+      finalAddAction.put(evolvedInternalSchema.findIdByName(name), name);
+    }
+
+    TableChanges.ColumnAddChange addChange = TableChanges.ColumnAddChange.get(oldSchema);
+    finalAddAction.entrySet().stream().forEach(f -> {
+      String name = f.getValue();
+      int splitPoint = name.lastIndexOf(".");
+      String parentName = splitPoint > 0 ? name.substring(0, splitPoint) : "";
+      String rawName = splitPoint > 0 ? name.substring(splitPoint + 1) : name;
+      addChange.addColumns(parentName, rawName, evolvedInternalSchema.findType(name), null);
+    });
+
+    InternalSchema res = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, addChange);
+    if (supportPositionReorder) {
+      evolvedInternalSchema.getRecord().fields().forEach(f -> newFields.add(oldSchema.getRecord().field(f.name())));
+      return new InternalSchema(newFields);
+    } else {
+      return res;
+    }
+  }
+
+  public static InternalSchema evolveSchemaFromNewAvroSchema(Schema evolvedSchema, InternalSchema oldSchema) {
+    return evolveSchemaFromNewAvroSchema(evolvedSchema, oldSchema, false);
+  }
+
+  /**
+   * Canonical the nullability.
+   * Do not allow change cols Nullability field from optional to required.
+   * If above problem occurs, try to correct it.
+   *
+   * @param writeSchema writeSchema hoodie used to write data.
+   * @param readSchema read schema
+   * @return canonical Schema
+   */
+  public static Schema canonicalizeColumnNullability(Schema writeSchema, Schema readSchema) {
+    if (writeSchema.getFields().isEmpty() || readSchema.getFields().isEmpty()) {
+      return writeSchema;
+    }
+    InternalSchema writeInternalSchema = AvroInternalSchemaConverter.convert(writeSchema);
+    InternalSchema readInternalSchema = AvroInternalSchemaConverter.convert(readSchema);
+    List<String> colNamesWriteSchema = writeInternalSchema.getAllColsFullName();
+    List<String> colNamesFromReadSchema = readInternalSchema.getAllColsFullName();
+    // try to deal with optional change. now when we use sparksql to update hudi table,
+    // sparksql Will change the col type from optional to required, this is a bug.
+    List<String> candidateUpdateCols = colNamesWriteSchema.stream().filter(f -> {
+      boolean exist = colNamesFromReadSchema.contains(f);
+      if (exist && (writeInternalSchema.findField(f).isOptional() != readInternalSchema.findField(f).isOptional())) {
+        return true;
+      } else {
+        return false;
+      }
+    }).collect(Collectors.toList());
+    if (candidateUpdateCols.isEmpty()) {
+      return writeSchema;
+    }
+    // try to correct all changes
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(writeInternalSchema);
+    candidateUpdateCols.stream().forEach(f -> updateChange.updateColumnNullability(f, true));
+    Schema result = AvroInternalSchemaConverter.convert(SchemaChangeUtils.applyTableChanges2Schema(writeInternalSchema, updateChange), writeSchema.getName());
+    return result;
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/InternalSchemaUtils.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/InternalSchemaUtils.java
new file mode 100644
index 0000000000000..3c0877f6f58d4
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/InternalSchemaUtils.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.utils;
+
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.Types.Field;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.SortedMap;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Util methods to help us do some operations on InternalSchema.
+ * eg: column prune, filter rebuild for query engine...
+ */
+public class InternalSchemaUtils {
+
+  private InternalSchemaUtils() {
+  }
+
+  /**
+   * Create project internalSchema, based on the project names which produced by query engine.
+   * support nested project.
+   *
+   * @param schema a internal schema.
+   * @param names project names produced by query engine.
+   * @return a project internalSchema.
+   */
+  public static InternalSchema pruneInternalSchema(InternalSchema schema, List<String> names) {
+    // do check
+    List<Integer> prunedIds = names.stream().map(name -> {
+      int id = schema.findIdByName(name);
+      if (id == -1) {
+        throw new IllegalArgumentException(String.format("cannot prune col: %s which not exisit in hudi table", name));
+      }
+      return id;
+    }).collect(Collectors.toList());
+    // find top parent field ID. eg: a.b.c, f.g.h, only collect id of a and f ignore all child field.
+    List<Integer> topParentFieldIds = new ArrayList<>();
+    names.stream().forEach(f -> {
+      int id = schema.findIdByName(f.split("\\.")[0]);
+      if (!topParentFieldIds.contains(id)) {
+        topParentFieldIds.add(id);
+      }
+    });
+    return pruneInternalSchemaByID(schema, prunedIds, topParentFieldIds);
+  }
+
+  /**
+   * Create project internalSchema.
+   * support nested project.
+   *
+   * @param schema a internal schema.
+   * @param fieldIds project col field_ids.
+   * @return a project internalSchema.
+   */
+  public static InternalSchema pruneInternalSchemaByID(InternalSchema schema, List<Integer> fieldIds, List<Integer> topParentFieldIds) {
+    Types.RecordType recordType = (Types.RecordType)pruneType(schema.getRecord(), fieldIds);
+    // reorder top parent fields, since the recordType.fields() produced by pruneType maybe out of order.
+    List<Types.Field> newFields = new ArrayList<>();
+    if (topParentFieldIds != null && !topParentFieldIds.isEmpty()) {
+      for (int id : topParentFieldIds) {
+        Types.Field f = recordType.field(id);
+        if (f != null) {
+          newFields.add(f);
+        } else {
+          throw new HoodieSchemaException(String.format("cannot find pruned id %s in currentSchema %s", id, schema.toString()));
+        }
+      }
+    }
+    return new InternalSchema(newFields.isEmpty() ? recordType.fields() : newFields);
+  }
+
+  /**
+   * Project hudi type by projected cols field_ids
+   * this is auxiliary function used by pruneInternalSchema.
+   */
+  private static Type pruneType(Type type, List<Integer> fieldIds) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Types.Field> fields = record.fields();
+        List<Type> newTypes = new ArrayList<>();
+        for (Types.Field f : fields) {
+          Type newType = pruneType(f.type(), fieldIds);
+          if (fieldIds.contains(f.fieldId())) {
+            newTypes.add(f.type());
+          } else if (newType != null) {
+            newTypes.add(newType);
+          } else {
+            newTypes.add(null);
+          }
+        }
+        boolean changed = false;
+        List<Field> newFields = new ArrayList<>();
+        for (int i = 0; i < fields.size(); i++) {
+          Types.Field oldField = fields.get(i);
+          Type newType = newTypes.get(i);
+          if (oldField.type() == newType) {
+            newFields.add(oldField);
+          } else if (newType != null) {
+            changed = true;
+            newFields.add(Types.Field.get(oldField.fieldId(), oldField.isOptional(), oldField.name(), newType, oldField.doc()));
+          }
+        }
+        if (newFields.isEmpty()) {
+          return null;
+        }
+        if (newFields.size() == fields.size() && !changed) {
+          return record;
+        } else {
+          return Types.RecordType.get(newFields);
+        }
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Type newElementType = pruneType(array.elementType(), fieldIds);
+        if (fieldIds.contains(array.elementId())) {
+          return array;
+        } else if (newElementType != null) {
+          if (array.elementType() == newElementType) {
+            return array;
+          }
+          return Types.ArrayType.get(array.elementId(), array.isElementOptional(), newElementType);
+        }
+        return null;
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        Type newValueType = pruneType(map.valueType(), fieldIds);
+        if (fieldIds.contains(map.valueId())) {
+          return map;
+        } else if (newValueType != null) {
+          if (map.valueType() == newValueType) {
+            return map;
+          }
+          return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, map.isValueOptional());
+        }
+        return null;
+      default:
+        return null;
+    }
+  }
+
+  /**
+   * A helper function to help correct the colName of pushed filters.
+   *
+   * @param name origin col name from pushed filters.
+   * @param fileSchema the real schema of avro/parquet file.
+   * @param querySchema the query schema which query engine produced.
+   * @return a corrected name.
+   */
+  public static String reBuildFilterName(String name, InternalSchema fileSchema, InternalSchema querySchema) {
+    int nameId = querySchema.findIdByName(name);
+    if (nameId == -1) {
+      throw new IllegalArgumentException(String.format("cannot found filter col name：%s from querySchema: %s", name, querySchema));
+    }
+    if (fileSchema.findField(nameId) == null) {
+      // added operation found
+      // the read file does not contain current col, so current colFilter is invalid
+      return "";
+    } else {
+      if (name.equals(fileSchema.findfullName(nameId))) {
+        // no change happened on current col
+        return name;
+      } else {
+        // find rename operation on current col
+        // return the name from fileSchema
+        return fileSchema.findfullName(nameId);
+      }
+    }
+  }
+
+  /**
+   * Collect all type changed cols to build a colPosition -> (newColType, oldColType) map.
+   * only collect top level col changed. eg: a is a nest field(record(b int, d long), now a.b is changed from int to long,
+   * only a will be collected, a.b will excluded.
+   *
+   * @param schema a type changed internalSchema
+   * @param oldSchema an old internalSchema.
+   * @return a map.
+   */
+  public static Map<Integer, Pair<Type, Type>> collectTypeChangedCols(InternalSchema schema, InternalSchema oldSchema) {
+    Set<Integer> ids = schema.getAllIds();
+    Set<Integer> otherIds = oldSchema.getAllIds();
+    Map<Integer, Pair<Type, Type>> result = new HashMap<>();
+    ids.stream().filter(f -> otherIds.contains(f)).forEach(f -> {
+      if (!schema.findType(f).equals(oldSchema.findType(f))) {
+        String[] fieldNameParts = schema.findfullName(f).split("\\.");
+        String[] otherFieldNameParts = oldSchema.findfullName(f).split("\\.");
+        String parentName = fieldNameParts[0];
+        String otherParentName = otherFieldNameParts[0];
+        if (fieldNameParts.length == otherFieldNameParts.length && schema.findIdByName(parentName) == oldSchema.findIdByName(otherParentName)) {
+          int index = schema.findIdByName(parentName);
+          int position = schema.getRecord().fields().stream().map(s -> s.fieldId()).collect(Collectors.toList()).indexOf(index);
+          if (!result.containsKey(position)) {
+            result.put(position, Pair.of(schema.findType(parentName), oldSchema.findType(otherParentName)));
+          }
+        }
+      }
+    });
+    return result;
+  }
+
+  /**
+   * Search target internalSchema by version number.
+   *
+   * @param versionId the internalSchema version to be search.
+   * @param internalSchemas internalSchemas to be searched.
+   * @return a internalSchema.
+   */
+  public static InternalSchema searchSchema(long versionId, List<InternalSchema> internalSchemas) {
+    TreeMap<Long, InternalSchema> treeMap = new TreeMap<>();
+    internalSchemas.forEach(s -> treeMap.put(s.schemaId(), s));
+    return searchSchema(versionId, treeMap);
+  }
+
+  /**
+   * Search target internalSchema by version number.
+   *
+   * @param versionId the internalSchema version to be search.
+   * @param treeMap internalSchemas collections to be searched.
+   * @return a internalSchema.
+   */
+  public static InternalSchema searchSchema(long versionId, TreeMap<Long, InternalSchema> treeMap) {
+    if (treeMap.containsKey(versionId)) {
+      return treeMap.get(versionId);
+    } else {
+      SortedMap<Long, InternalSchema> headMap = treeMap.headMap(versionId);
+      if (!headMap.isEmpty()) {
+        return headMap.get(headMap.lastKey());
+      }
+    }
+    return InternalSchema.getEmptyInternalSchema();
+  }
+
+  public static String createFullName(String name, Deque<String> fieldNames) {
+    String result = name;
+    if (!fieldNames.isEmpty()) {
+      List<String> parentNames = new ArrayList<>();
+      fieldNames.descendingIterator().forEachRemaining(parentNames::add);
+      result = parentNames.stream().collect(Collectors.joining(".")) + "." + result;
+    }
+    return result;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SchemaChangeUtils.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SchemaChangeUtils.java
new file mode 100644
index 0000000000000..d719008042021
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SchemaChangeUtils.java
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.utils;
+
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.action.TableChanges;
+import org.apache.hudi.internal.schema.action.TableChangesHelper;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Helper methods for schema Change.
+ */
+public class SchemaChangeUtils {
+  private SchemaChangeUtils() {
+
+  }
+
+  /**
+   * Whether to allow the column type to be updated.
+   * now only support:
+   * int => long/float/double/string
+   * long => float/double/string
+   * float => double/String
+   * double => String/Decimal
+   * Decimal => Decimal/String
+   * String => date/decimal
+   * date => String
+   * TODO: support more type update.
+   *
+   * @param src origin column type.
+   * @param dsr new column type.
+   * @return whether to allow the column type to be updated.
+   */
+  public static boolean isTypeUpdateAllow(Type src, Type dsr) {
+    if (src.isNestedType() || dsr.isNestedType()) {
+      throw new IllegalArgumentException("only support update primitive type");
+    }
+    if (src.equals(dsr)) {
+      return true;
+    }
+    switch (src.typeId()) {
+      case INT:
+        return dsr == Types.LongType.get() || dsr == Types.FloatType.get()
+            || dsr == Types.DoubleType.get() || dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
+      case LONG:
+        return dsr == Types.FloatType.get() || dsr == Types.DoubleType.get() || dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
+      case FLOAT:
+        return dsr == Types.DoubleType.get() || dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
+      case DOUBLE:
+        return dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
+      case DATE:
+        return dsr == Types.StringType.get();
+      case DECIMAL:
+        if (dsr.typeId() == Type.TypeID.DECIMAL) {
+          Types.DecimalType decimalSrc = (Types.DecimalType)src;
+          Types.DecimalType decimalDsr = (Types.DecimalType)dsr;
+          if (decimalDsr.isWiderThan(decimalSrc)) {
+            return true;
+          }
+        } else if (dsr.typeId() == Type.TypeID.STRING) {
+          return true;
+        }
+        break;
+      case STRING:
+        return dsr == Types.DateType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
+      default:
+        return false;
+    }
+    return false;
+  }
+
+  /**
+   * Apply all the DDL add operations to internalSchema to produce a new internalSchema.
+   *
+   * @param internalSchema origin internalSchema.
+   * @param adds a wrapper class for all the DDL add operations.
+   * @return a new internalSchema.
+   */
+  public static InternalSchema applyTableChanges2Schema(InternalSchema internalSchema, TableChanges.ColumnAddChange adds) {
+    Types.RecordType newType = (Types.RecordType)applyTableChange2Type(internalSchema.getRecord(), adds);
+    // deal with root level changes
+    List<Types.Field> newFields = TableChangesHelper.applyAddChange2Fields(newType.fields(),
+        adds.getParentId2AddCols().get(-1), adds.getPositionChangeMap().get(-1));
+    return new InternalSchema(newFields);
+  }
+
+  /**
+   * Apply all the DDL add operations to Type to produce a new internalSchema.
+   * do not call this method directly. expose this method only for UT.
+   *
+   * @param type origin hudi Type.
+   * @param adds a wrapper class for all the DDL add operations.
+   * @return a new internalSchema.
+   */
+  public static Type applyTableChange2Type(Type type, TableChanges.ColumnAddChange adds) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Type> newTypes = new ArrayList<>();
+        for (Types.Field f : record.fields()) {
+          Type newType = applyTableChange2Type(f.type(), adds);
+          // try to apply add
+          newTypes.add(newType.isNestedType() ? adds.applyAdd(f, newType) : newType);
+        }
+        List<Types.Field> newFields = new ArrayList<>();
+        boolean hasChanged = false;
+        for (int i = 0; i < newTypes.size(); i++) {
+          Type newType = newTypes.get(i);
+          Types.Field oldfield = record.fields().get(i);
+          if (oldfield.type() == newType) {
+            newFields.add(oldfield);
+          } else {
+            hasChanged = true;
+            newFields.add(Types.Field.get(oldfield.fieldId(), oldfield.isOptional(), oldfield.name(), newType, oldfield.doc()));
+          }
+        }
+        return hasChanged ? Types.RecordType.get(newFields) : record;
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Type newElementType;
+        Types.Field elementField = array.field(array.elementId());
+        newElementType = applyTableChange2Type(array.elementType(), adds);
+        // try to apply add
+        newElementType = adds.applyAdd(elementField, newElementType);
+        if (newElementType == array.elementType()) {
+          return array;
+        }
+        return Types.ArrayType.get(array.elementId(), array.isElementOptional(), newElementType);
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        Type newValueType;
+        Types.Field valueField = map.field(map.valueId());
+        if (adds.getParentId2AddCols().containsKey(map.keyId())) {
+          throw new IllegalArgumentException("Cannot add fields to map keys: " + map);
+        }
+        newValueType = applyTableChange2Type(map.valueType(), adds);
+        // try to apply add
+        newValueType = adds.applyAdd(valueField, newValueType);
+        if (newValueType == map.valueType()) {
+          return map;
+        }
+        return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, map.isValueOptional());
+      default:
+        return type;
+    }
+  }
+
+  /**
+   * Apply all the DDL delete operations to internalSchema to produce a new internalSchema.
+   *
+   * @param internalSchema origin internalSchema.
+   * @param deletes a wrapper class for all the DDL delete operations.
+   * @return a new internalSchema.
+   */
+  public static InternalSchema applyTableChanges2Schema(InternalSchema internalSchema, TableChanges.ColumnDeleteChange deletes) {
+    Types.RecordType newType = (Types.RecordType)applyTableChange2Type(internalSchema.getRecord(), deletes);
+    return new InternalSchema(newType.fields());
+  }
+
+  /**
+   * Apply all the DDL delete operations to Type to produce a new internalSchema.
+   * do not call this method directly. expose this method only for UT.
+   *
+   * @param type origin type.
+   * @param deletes a wrapper class for all the DDL delete operations.
+   * @return a new internalSchema.
+   */
+  private static Type applyTableChange2Type(Type type, TableChanges.ColumnDeleteChange deletes) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Types.Field> fields = new ArrayList<>();
+        for (Types.Field f : record.fields()) {
+          Type newType = applyTableChange2Type(f.type(), deletes);
+          // apply delete
+          newType = deletes.applyDelete(f.fieldId(), newType);
+          if (newType != null) {
+            fields.add(Types.Field.get(f.fieldId(), f.isOptional(), f.name(), newType, f.doc()));
+          }
+        }
+        if (fields.isEmpty()) {
+          throw new UnsupportedOperationException("cannot support delete all columns from Struct");
+        }
+        return Types.RecordType.get(fields);
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Type newElementType = applyTableChange2Type(array.elementType(), deletes);
+        newElementType = deletes.applyDelete(array.elementId(), newElementType);
+        if (newElementType == null) {
+          throw new IllegalArgumentException(String.format("cannot delete element from arrayType: %s", array));
+        }
+        return Types.ArrayType.get(array.elementId(), array.isElementOptional(), newElementType);
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        int keyId = map.fields().get(0).fieldId();
+        if (deletes.getDeletes().contains(keyId)) {
+          throw new IllegalArgumentException(String.format("cannot delete key from mapType: %s", map));
+        }
+        Type newValueType = applyTableChange2Type(map.valueType(), deletes);
+        newValueType = deletes.applyDelete(map.valueId(), newValueType);
+        if (newValueType == null) {
+          throw new IllegalArgumentException(String.format("cannot delete value from mapType: %s", map));
+        }
+        return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, map.isValueOptional());
+      default:
+        return type;
+    }
+  }
+
+  /**
+   * Apply all the DDL update operations to internalSchema to produce a new internalSchema.
+   *
+   * @param internalSchema origin internalSchema.
+   * @param updates a wrapper class for all the DDL update operations.
+   * @return a new internalSchema.
+   */
+  public static InternalSchema applyTableChanges2Schema(InternalSchema internalSchema, TableChanges.ColumnUpdateChange updates) {
+    Types.RecordType newType = (Types.RecordType)applyTableChange2Type(internalSchema.getRecord(), updates);
+    // deal with root level changes
+    List<Types.Field> newFields = TableChangesHelper.applyAddChange2Fields(newType.fields(),
+        new ArrayList<>(), updates.getPositionChangeMap().get(-1));
+    return new InternalSchema(newFields);
+  }
+
+  /**
+   * Apply all the DDL update operations to type to produce a new internalSchema.
+   * do not call this method directly. expose this method only for UT.
+   *
+   * @param type origin internalSchema.
+   * @param updates a wrapper class for all the DDL update operations.
+   * @return a new internalSchema.
+   */
+  private static Type applyTableChange2Type(Type type, TableChanges.ColumnUpdateChange updates) {
+    switch (type.typeId()) {
+      case RECORD:
+        Types.RecordType record = (Types.RecordType) type;
+        List<Type> newTypes = new ArrayList<>();
+        for (Types.Field f : record.fields()) {
+          Type newType = applyTableChange2Type(f.type(), updates);
+          newTypes.add(updates.applyUpdates(f, newType));
+        }
+        List<Types.Field> newFields = new ArrayList<>();
+        for (int i = 0; i < newTypes.size(); i++) {
+          Type newType = newTypes.get(i);
+          Types.Field oldField = record.fields().get(i);
+          Types.Field updateField = updates.getUpdates().get(oldField.fieldId());
+          if (updateField != null) {
+            newFields.add(Types.Field.get(oldField.fieldId(), updateField.isOptional(), updateField.name(), newType, updateField.doc()));
+          } else if (!oldField.type().equals(newType)) {
+            newFields.add(Types.Field.get(oldField.fieldId(), oldField.isOptional(), oldField.name(), newType, oldField.doc()));
+          } else {
+            newFields.add(oldField);
+          }
+        }
+        return Types.RecordType.get(newFields);
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        Type newElementType;
+        Types.Field elementField = array.fields().get(0);
+        newElementType = applyTableChange2Type(array.elementType(), updates);
+        newElementType = updates.applyUpdates(elementField, newElementType);
+        Types.Field elementUpdate = updates.getUpdates().get(elementField.fieldId());
+        boolean optional = elementUpdate == null ? array.isElementOptional() : elementUpdate.isOptional();
+        if (optional == elementField.isOptional() && array.elementType() == newElementType) {
+          return array;
+        }
+        return Types.ArrayType.get(array.elementId(), optional, newElementType);
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        Types.Field valueFiled = map.fields().get(1);
+        Type newValueType;
+        newValueType = applyTableChange2Type(map.valueType(), updates);
+        newValueType = updates.applyUpdates(valueFiled, newValueType);
+        Types.Field valueUpdate = updates.getUpdates().get(valueFiled.fieldId());
+        boolean valueOptional = valueUpdate == null ? map.isValueOptional() : valueUpdate.isOptional();
+        if (valueOptional == map.isValueOptional() && map.valueType() == newValueType) {
+          return map;
+        }
+        return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, valueOptional);
+      default:
+        return type;
+    }
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java
new file mode 100644
index 0000000000000..fe6174057bbdc
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.utils;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class SerDeHelper {
+  private SerDeHelper() {
+
+  }
+
+  public static final String LATEST_SCHEMA = "latest_schema";
+  public static final String SCHEMAS = "schemas";
+  private static final String MAX_COLUMN_ID = "max_column_id";
+  private static final String VERSION_ID = "version_id";
+  private static final String TYPE = "type";
+  private static final String RECORD = "record";
+  private static final String ARRAY = "array";
+  private static final String MAP = "map";
+  private static final String FIELDS = "fields";
+  private static final String ELEMENT = "element";
+  private static final String KEY = "key";
+  private static final String VALUE = "value";
+  private static final String DOC = "doc";
+  private static final String NAME = "name";
+  private static final String ID = "id";
+  private static final String ELEMENT_ID = "element_id";
+  private static final String KEY_ID = "key_id";
+  private static final String VALUE_ID = "value_id";
+  private static final String OPTIONAL = "optional";
+  private static final String ELEMENT_OPTIONAL = "element_optional";
+  private static final String VALUE_OPTIONAL = "value_optional";
+
+  private static final Pattern FIXED = Pattern.compile("fixed\\[(\\d+)\\]");
+  private static final Pattern DECIMAL = Pattern.compile("decimal\\((\\d+),\\s+(\\d+)\\)");
+
+  /**
+   * Convert history internalSchemas to json.
+   * this is used when save history schemas into hudi.
+   *
+   * @param internalSchemas history internal schemas
+   * @return a string
+   */
+  public static String toJson(List<InternalSchema> internalSchemas) {
+    try {
+      StringWriter writer = new StringWriter();
+      JsonGenerator generator = (new JsonFactory()).createGenerator(writer);
+      generator.writeStartObject();
+      generator.writeArrayFieldStart(SCHEMAS);
+      for (InternalSchema schema : internalSchemas) {
+        toJson(schema, generator);
+      }
+      generator.writeEndArray();
+      generator.writeEndObject();
+      generator.flush();
+      return writer.toString();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Convert internalSchemas to json.
+   *
+   * @param internalSchema a internal schema
+   * @return a string
+   */
+  public static String toJson(InternalSchema internalSchema) {
+    if (internalSchema == null || internalSchema.isEmptySchema()) {
+      return "";
+    }
+    try {
+      StringWriter writer = new StringWriter();
+      JsonGenerator generator = (new JsonFactory()).createGenerator(writer);
+      toJson(internalSchema, generator);
+      generator.flush();
+      return writer.toString();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static void toJson(InternalSchema internalSchema, JsonGenerator generator) throws IOException {
+    toJson(internalSchema.getRecord(), internalSchema.getMaxColumnId(), internalSchema.schemaId(), generator);
+  }
+
+  private static void toJson(Types.RecordType record, Integer maxColumnId, Long versionId, JsonGenerator generator) throws IOException {
+    generator.writeStartObject();
+    if (maxColumnId != null) {
+      generator.writeNumberField(MAX_COLUMN_ID, maxColumnId);
+    }
+    if (versionId != null) {
+      generator.writeNumberField(VERSION_ID, versionId);
+    }
+    generator.writeStringField(TYPE, RECORD);
+    generator.writeArrayFieldStart(FIELDS);
+    for (Types.Field field : record.fields()) {
+      generator.writeStartObject();
+      generator.writeNumberField(ID, field.fieldId());
+      generator.writeStringField(NAME, field.name());
+      generator.writeBooleanField(OPTIONAL, field.isOptional());
+      generator.writeFieldName(TYPE);
+      toJson(field.type(), generator);
+      if (field.doc() != null) {
+        generator.writeStringField(DOC, field.doc());
+      }
+      generator.writeEndObject();
+    }
+    generator.writeEndArray();
+    generator.writeEndObject();
+  }
+
+  private static void toJson(Type type, JsonGenerator generator) throws IOException {
+    switch (type.typeId()) {
+      case RECORD:
+        toJson((Types.RecordType) type, null, null, generator);
+        break;
+      case ARRAY:
+        Types.ArrayType array = (Types.ArrayType) type;
+        generator.writeStartObject();
+        generator.writeStringField(TYPE, ARRAY);
+        generator.writeNumberField(ELEMENT_ID, array.elementId());
+        generator.writeFieldName(ELEMENT);
+        toJson(array.elementType(), generator);
+        generator.writeBooleanField(ELEMENT_OPTIONAL, array.isElementOptional());
+        generator.writeEndObject();
+        break;
+      case MAP:
+        Types.MapType map = (Types.MapType) type;
+        generator.writeStartObject();
+        generator.writeStringField(TYPE, MAP);
+        generator.writeNumberField(KEY_ID, map.keyId());
+        generator.writeFieldName(KEY);
+        toJson(map.keyType(), generator);
+        generator.writeNumberField(VALUE_ID, map.valueId());
+        generator.writeFieldName(VALUE);
+        toJson(map.valueType(), generator);
+        generator.writeBooleanField(VALUE_OPTIONAL, map.isValueOptional());
+        generator.writeEndObject();
+        break;
+      default:
+        if (!type.isNestedType()) {
+          generator.writeString(type.toString());
+        } else {
+          throw new IllegalArgumentIOException(String.format("cannot write unknown types: %s", type));
+        }
+    }
+  }
+
+  private static Type parserTypeFromJson(JsonNode jsonNode) {
+    if (jsonNode.isTextual()) {
+      String type = jsonNode.asText().toLowerCase(Locale.ROOT);
+      // deal with fixed and decimal
+      Matcher fixed = FIXED.matcher(type);
+      if (fixed.matches()) {
+        return Types.FixedType.getFixed(Integer.parseInt(fixed.group(1)));
+      }
+      Matcher decimal = DECIMAL.matcher(type);
+      if (decimal.matches()) {
+        return Types.DecimalType.get(
+            Integer.parseInt(decimal.group(1)),
+            Integer.parseInt(decimal.group(2)));
+      }
+      // deal with other type
+      switch (Type.fromValue(type)) {
+        case BOOLEAN:
+          return Types.BooleanType.get();
+        case INT:
+          return Types.IntType.get();
+        case LONG:
+          return Types.LongType.get();
+        case FLOAT:
+          return Types.FloatType.get();
+        case DOUBLE:
+          return Types.DoubleType.get();
+        case DATE:
+          return Types.DateType.get();
+        case TIME:
+          return Types.TimeType.get();
+        case TIMESTAMP:
+          return Types.TimestampType.get();
+        case STRING:
+          return Types.StringType.get();
+        case UUID:
+          return Types.UUIDType.get();
+        case BINARY:
+          return Types.BinaryType.get();
+        default:
+          throw new IllegalArgumentException("cannot parser types from jsonNode");
+      }
+    } else if (jsonNode.isObject()) {
+      String typeStr = jsonNode.get(TYPE).asText();
+      if (RECORD.equals(typeStr)) {
+        JsonNode fieldNodes = jsonNode.get(FIELDS);
+        Iterator<JsonNode> iter = fieldNodes.elements();
+        List<Types.Field> fields = new ArrayList<>();
+        while (iter.hasNext()) {
+          JsonNode field = iter.next();
+          // extract
+          int id = field.get(ID).asInt();
+          String name = field.get(NAME).asText();
+          Type type = parserTypeFromJson(field.get(TYPE));
+          String doc = field.has(DOC) ? field.get(DOC).asText() : null;
+          boolean optional = field.get(OPTIONAL).asBoolean();
+          // build fields
+          fields.add(Types.Field.get(id, optional, name, type, doc));
+        }
+        return Types.RecordType.get(fields);
+      } else if (ARRAY.equals(typeStr)) {
+        int elementId = jsonNode.get(ELEMENT_ID).asInt();
+        Type elementType = parserTypeFromJson(jsonNode.get(ELEMENT));
+        boolean optional = jsonNode.get(ELEMENT_OPTIONAL).asBoolean();
+        return Types.ArrayType.get(elementId, optional, elementType);
+      } else if (MAP.equals(typeStr)) {
+        int keyId = jsonNode.get(KEY_ID).asInt();
+        Type keyType = parserTypeFromJson(jsonNode.get(KEY));
+        int valueId = jsonNode.get(VALUE_ID).asInt();
+        Type valueType = parserTypeFromJson(jsonNode.get(VALUE));
+        boolean optional = jsonNode.get(VALUE_OPTIONAL).asBoolean();
+        return Types.MapType.get(keyId, valueId, keyType, valueType, optional);
+      }
+    }
+    throw new IllegalArgumentException(String.format("cannot parse type from jsonNode: %s", jsonNode));
+  }
+
+  /**
+   * Convert jsonNode to internalSchema.
+   *
+   * @param jsonNode a jsonNode.
+   * @return a internalSchema.
+   */
+  public static InternalSchema fromJson(JsonNode jsonNode) {
+    Integer maxColumnId = !jsonNode.has(MAX_COLUMN_ID) ? null : jsonNode.get(MAX_COLUMN_ID).asInt();
+    Long versionId = !jsonNode.has(VERSION_ID) ? null : jsonNode.get(VERSION_ID).asLong();
+    Types.RecordType type = (Types.RecordType)parserTypeFromJson(jsonNode);
+    if (versionId == null) {
+      return new InternalSchema(type.fields());
+    } else {
+      if (maxColumnId != null) {
+        return new InternalSchema(versionId, maxColumnId, type.fields());
+      } else {
+        return new InternalSchema(versionId, type.fields());
+      }
+    }
+  }
+
+  /**
+   * Convert string to internalSchema.
+   *
+   * @param json a json string.
+   * @return a internalSchema.
+   */
+  public static Option<InternalSchema> fromJson(String json) {
+    if (json == null || json.isEmpty()) {
+      return Option.empty();
+    }
+    try {
+      return Option.of(fromJson((new ObjectMapper(new JsonFactory())).readValue(json, JsonNode.class)));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Convert json string to history internalSchemas.
+   * TreeMap is used to hold history internalSchemas.
+   *
+   * @param json a json string
+   * @return a TreeMap
+   */
+  public static TreeMap<Long, InternalSchema> parseSchemas(String json) {
+    TreeMap<Long, InternalSchema> result = new TreeMap<>();
+    try {
+      JsonNode jsonNode = (new ObjectMapper(new JsonFactory())).readValue(json, JsonNode.class);
+      if (!jsonNode.has(SCHEMAS)) {
+        throw new IllegalArgumentException(String.format("cannot parser schemas from current json string, missing key name: %s", SCHEMAS));
+      }
+      JsonNode schemas = jsonNode.get(SCHEMAS);
+      Iterator<JsonNode> iter = schemas.elements();
+      while (iter.hasNext()) {
+        JsonNode schema = iter.next();
+        InternalSchema current = fromJson(schema);
+        result.put(current.schemaId(), current);
+      }
+    } catch (IOException e) {
+      throw new HoodieException(e);
+    }
+    return result;
+  }
+
+  /**
+   * Add the new schema to the historical schemas.
+   * use string operations to reduce overhead.
+   *
+   * @param newSchema a new internalSchema
+   * @param oldSchemas historical schemas string.
+   * @return a string.
+   */
+  public static String inheritSchemas(InternalSchema newSchema, String oldSchemas) {
+    if (newSchema == null) {
+      return "";
+    }
+    if (oldSchemas == null || oldSchemas.isEmpty()) {
+      return toJson(Arrays.asList(newSchema));
+    }
+    String checkedString = "{\"schemas\":[";
+    if (!oldSchemas.startsWith("{\"schemas\":")) {
+      return "";
+    }
+    String oldSchemasSuffix = oldSchemas.substring(checkedString.length());
+    return checkedString + toJson(newSchema) + "," + oldSchemasSuffix;
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/visitor/InternalSchemaVisitor.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/visitor/InternalSchemaVisitor.java
new file mode 100644
index 0000000000000..79a9410c65555
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/visitor/InternalSchemaVisitor.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.visitor;
+
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.List;
+
+/**
+ * Base class of schema visitor.
+ */
+public abstract class InternalSchemaVisitor<T> {
+
+  public void beforeField(Types.Field field) {
+  }
+
+  public void afterField(Types.Field field) {
+  }
+
+  public void beforeArrayElement(Types.Field elementField) {
+    beforeField(elementField);
+  }
+
+  public void afterArrayElement(Types.Field elementField) {
+    afterField(elementField);
+  }
+
+  public void beforeMapKey(Types.Field keyField) {
+    beforeField(keyField);
+  }
+
+  public void afterMapKey(Types.Field keyField) {
+    afterField(keyField);
+  }
+
+  public void beforeMapValue(Types.Field valueField) {
+    beforeField(valueField);
+  }
+
+  public void afterMapValue(Types.Field valueField) {
+    afterField(valueField);
+  }
+
+  public T schema(InternalSchema schema, T recordResult) {
+    return null;
+  }
+
+  public T record(Types.RecordType record, List<T> fieldResults) {
+    return null;
+  }
+
+  public T field(Types.Field field, T fieldResult) {
+    return null;
+  }
+
+  public T array(Types.ArrayType array, T elementResult) {
+    return null;
+  }
+
+  public T map(Types.MapType map, T keyResult, T valueResult) {
+    return null;
+  }
+
+  public T primitive(Type.PrimitiveType primitive) {
+    return null;
+  }
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/visitor/NameToIDVisitor.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/visitor/NameToIDVisitor.java
new file mode 100644
index 0000000000000..4960f434eeb23
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/visitor/NameToIDVisitor.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.visitor;
+
+import static org.apache.hudi.internal.schema.utils.InternalSchemaUtils.createFullName;
+
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Schema visitor to produce name -> id map for internalSchema.
+ */
+public class NameToIDVisitor extends InternalSchemaVisitor<Map<String, Integer>> {
+  private final Deque fieldNames = new LinkedList<>();
+  private final Map<String, Integer> nameToId = new HashMap<>();
+
+  @Override
+  public void beforeField(Types.Field field) {
+    fieldNames.push(field.name());
+  }
+
+  @Override
+  public void afterField(Types.Field field) {
+    fieldNames.pop();
+  }
+
+  @Override
+  public void beforeArrayElement(Types.Field elementField) {
+    fieldNames.push(elementField.name());
+  }
+
+  @Override
+  public void afterArrayElement(Types.Field elementField) {
+    fieldNames.pop();
+  }
+
+  @Override
+  public void beforeMapKey(Types.Field keyField) {
+    fieldNames.push(keyField.name());
+  }
+
+  @Override
+  public void afterMapKey(Types.Field keyField) {
+    fieldNames.pop();
+  }
+
+  @Override
+  public void beforeMapValue(Types.Field valueField) {
+    fieldNames.push(valueField.name());
+  }
+
+  @Override
+  public void afterMapValue(Types.Field valueField) {
+    fieldNames.pop();
+  }
+
+  @Override
+  public Map<String, Integer> schema(InternalSchema schema, Map<String, Integer> recordResult) {
+    return nameToId;
+  }
+
+  @Override
+  public Map<String, Integer> record(Types.RecordType record, List<Map<String, Integer>> fieldResults) {
+    return nameToId;
+  }
+
+  @Override
+  public Map<String, Integer> field(Types.Field field, Map<String, Integer> fieldResult) {
+    nameToId.put(createFullName(field.name(), fieldNames), field.fieldId());
+    return nameToId;
+  }
+
+  @Override
+  public Map<String, Integer> array(Types.ArrayType array, Map<String, Integer> elementResult) {
+    nameToId.put(createFullName("element", fieldNames), array.elementId());
+    return nameToId;
+  }
+
+  @Override
+  public Map<String, Integer> map(Types.MapType map, Map<String, Integer> keyResult, Map<String, Integer> valueResult) {
+    nameToId.put(createFullName("key", fieldNames), map.keyId());
+    nameToId.put(createFullName("value", fieldNames), map.valueId());
+    return nameToId;
+  }
+
+  @Override
+  public Map<String, Integer> primitive(Type.PrimitiveType primitive) {
+    return nameToId;
+  }
+}
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
index cb330b81432bf..6490425c42b75 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
@@ -18,32 +18,28 @@
 
 package org.apache.hudi.io.storage;
 
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 
-public interface HoodieFileReader<R extends IndexedRecord> extends AutoCloseable {
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+import java.util.Set;
 
-  public String[] readMinMaxRecordKeys();
+public interface HoodieFileReader<R extends IndexedRecord> extends AutoCloseable {
 
-  public BloomFilter readBloomFilter();
+  String[] readMinMaxRecordKeys();
 
-  public Set<String> filterRowKeys(Set<String> candidateRowKeys);
+  BloomFilter readBloomFilter();
 
-  default Map<String, R> getRecordsByKeys(List<String> rowKeys) throws IOException {
-    throw new UnsupportedOperationException();
-  }
+  Set<String> filterRowKeys(Set<String> candidateRowKeys);
 
-  public Iterator<R> getRecordIterator(Schema readerSchema) throws IOException;
+  ClosableIterator<R> getRecordIterator(Schema readerSchema) throws IOException;
 
-  default Iterator<R> getRecordIterator() throws IOException {
+  default ClosableIterator<R> getRecordIterator() throws IOException {
     return getRecordIterator(getSchema());
   }
 
@@ -55,6 +51,22 @@ default Option<R> getRecordByKey(String key) throws IOException {
     return getRecordByKey(key, getSchema());
   }
 
+  default ClosableIterator<R> getRecordsByKeysIterator(List<String> keys, Schema schema) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  default ClosableIterator<R> getRecordsByKeysIterator(List<String> keys) throws IOException {
+    return getRecordsByKeysIterator(keys, getSchema());
+  }
+
+  default ClosableIterator<R> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema schema) throws IOException {
+    throw new UnsupportedEncodingException();
+  }
+
+  default ClosableIterator<R> getRecordsByKeyPrefixIterator(List<String> keyPrefixes) throws IOException {
+    return getRecordsByKeyPrefixIterator(keyPrefixes, getSchema());
+  }
+
   Schema getSchema();
 
   void close();
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
index 2d4d96959e150..aaf1dcd7037b7 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
@@ -19,11 +19,11 @@
 
 package org.apache.hudi.io.storage;
 
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.CellComparatorImpl;
 
 /**
  * This class is explicitly used as Key Comparator to work around the hard coded
  * legacy format class names inside HBase. Otherwise, we will face issues with shading.
  */
-public class HoodieHBaseKVComparator extends KeyValue.KVComparator {
+public class HoodieHBaseKVComparator extends CellComparatorImpl {
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
index 371da7675e992..899c2475da26c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
@@ -18,134 +18,132 @@
 
 package org.apache.hudi.io.storage;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
-
 import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PositionedReadable;
 import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileInfo;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.BloomFilterFactory;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.util.Lazy;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.util.CollectionUtils.toStream;
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
+
+/**
+ * NOTE: PLEASE READ DOCS & COMMENTS CAREFULLY BEFORE MAKING CHANGES
+ * <p>
+ * {@link HoodieFileReader} implementation allowing to read from {@link HFile}.
+ */
 public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileReader<R> {
-  private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class);
-  private Path path;
-  private Configuration conf;
-  private HFile.Reader reader;
-  private FSDataInputStream fsDataInputStream;
-  private Schema schema;
-  // Scanner used to read individual keys. This is cached to prevent the overhead of opening the scanner for each
-  // key retrieval.
-  private HFileScanner keyScanner;
 
-  public static final String KEY_FIELD_NAME = "key";
-  public static final String KEY_SCHEMA = "schema";
+  // TODO HoodieHFileReader right now tightly coupled to MT, we should break that coupling
+  public static final String SCHEMA_KEY = "schema";
   public static final String KEY_BLOOM_FILTER_META_BLOCK = "bloomFilter";
   public static final String KEY_BLOOM_FILTER_TYPE_CODE = "bloomFilterTypeCode";
+
+  public static final String KEY_FIELD_NAME = "key";
   public static final String KEY_MIN_RECORD = "minRecordKey";
   public static final String KEY_MAX_RECORD = "maxRecordKey";
 
-  public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig) throws IOException {
-    this.conf = configuration;
-    this.path = path;
-    this.reader = HFile.createReader(FSUtils.getFs(path.toString(), configuration), path, cacheConfig, conf);
-  }
+  private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class);
 
-  public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException {
-    this.conf = configuration;
-    this.path = path;
-    this.fsDataInputStream = fs.open(path);
-    this.reader = HFile.createReader(fs, path, cacheConfig, configuration);
-  }
+  private final Path path;
+
+  private final Lazy<Schema> schema;
 
-  public HoodieHFileReader(byte[] content) throws IOException {
-    Configuration conf = new Configuration();
-    Path path = new Path("hoodie");
-    SeekableByteArrayInputStream bis = new SeekableByteArrayInputStream(content);
-    FSDataInputStream fsdis = new FSDataInputStream(bis);
-    this.reader = HFile.createReader(FSUtils.getFs("hoodie", conf), path, new FSDataInputStreamWrapper(fsdis),
-        content.length, new CacheConfig(conf), conf);
+  // NOTE: Reader is ONLY THREAD-SAFE for {@code Scanner} operating in Positional Read ("pread")
+  //       mode (ie created w/ "pread = true")
+  private final HFile.Reader reader;
+  // NOTE: Scanner caches read blocks, therefore it's important to re-use scanner
+  //       wherever possible
+  private final HFileScanner sharedScanner;
+
+  private final Object sharedScannerLock = new Object();
+
+  public HoodieHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig) throws IOException {
+    this(path,
+        HoodieHFileUtils.createHFileReader(FSUtils.getFs(path.toString(), hadoopConf), path, cacheConfig, hadoopConf),
+        Option.empty());
   }
 
-  @Override
-  public String[] readMinMaxRecordKeys() {
-    try {
-      Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
-      return new String[] { new String(fileInfo.get(KEY_MIN_RECORD.getBytes())),
-          new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))};
-    } catch (IOException e) {
-      throw new HoodieException("Could not read min/max record key out of file information block correctly from path", e);
-    }
+  public HoodieHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException {
+    this(path, HoodieHFileUtils.createHFileReader(fs, path, cacheConfig, hadoopConf), Option.empty());
   }
 
-  @Override
-  public Schema getSchema() {
-    if (schema == null) {
-      try {
-        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
-        schema = new Schema.Parser().parse(new String(fileInfo.get(KEY_SCHEMA.getBytes())));
-      } catch (IOException e) {
-        throw new HoodieException("Could not read schema of file from path", e);
-      }
-    }
+  public HoodieHFileReader(FileSystem fs, Path dummyPath, byte[] content, Option<Schema> schemaOpt) throws IOException {
+    this(null, HoodieHFileUtils.createHFileReader(fs, dummyPath, content), schemaOpt);
+  }
 
-    return schema;
+  public HoodieHFileReader(Path path, HFile.Reader reader, Option<Schema> schemaOpt) throws IOException {
+    this.path = path;
+    this.reader = reader;
+    // For shared scanner, which is primarily used for point-lookups, we're caching blocks
+    // by default, to minimize amount of traffic to the underlying storage
+    this.sharedScanner = getHFileScanner(reader, true);
+    this.schema = schemaOpt.map(Lazy::eagerly)
+        .orElseGet(() -> Lazy.lazily(() -> fetchSchema(reader)));
   }
 
-  /**
-   * Sets up the writer schema explicitly.
-   */
-  public void withSchema(Schema schema) {
-    this.schema = schema;
+  @Override
+  public String[] readMinMaxRecordKeys() {
+    // NOTE: This access to reader is thread-safe
+    HFileInfo fileInfo = reader.getHFileInfo();
+    return new String[]{new String(fileInfo.get(KEY_MIN_RECORD.getBytes())),
+        new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))};
   }
 
   @Override
   public BloomFilter readBloomFilter() {
-    Map<byte[], byte[]> fileInfo;
     try {
-      fileInfo = reader.loadFileInfo();
-      ByteBuffer serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false);
-      byte[] filterBytes = new byte[serializedFilter.remaining()];
-      serializedFilter.get(filterBytes); // read the bytes that were written
-      return BloomFilterFactory.fromString(new String(filterBytes),
+      // NOTE: This access to reader is thread-safe
+      HFileInfo fileInfo = reader.getHFileInfo();
+      ByteBuff buf = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false).getBufferWithoutHeader();
+      // We have to copy bytes here, since we can't reuse buffer's underlying
+      // array as is, since it contains additional metadata (header)
+      byte[] bytes = new byte[buf.remaining()];
+      buf.get(bytes);
+      return BloomFilterFactory.fromString(new String(bytes),
           new String(fileInfo.get(KEY_BLOOM_FILTER_TYPE_CODE.getBytes())));
     } catch (IOException e) {
       throw new HoodieException("Could not read bloom filter from " + path, e);
     }
   }
 
+  @Override
+  public Schema getSchema() {
+    return schema.get();
+  }
+
   /**
    * Filter keys by availability.
    * <p>
@@ -156,291 +154,420 @@ public BloomFilter readBloomFilter() {
    */
   @Override
   public Set<String> filterRowKeys(Set<String> candidateRowKeys) {
-    return candidateRowKeys.stream().filter(k -> {
-      try {
-        return isKeyAvailable(k);
-      } catch (IOException e) {
-        LOG.error("Failed to check key availability: " + k);
-        return false;
-      }
-    }).collect(Collectors.toSet());
-  }
+    checkState(candidateRowKeys instanceof TreeSet,
+        String.format("HFile reader expects a TreeSet as iterating over ordered keys is more performant, got (%s)", candidateRowKeys.getClass().getSimpleName()));
 
-  @Override
-  public Map<String, R> getRecordsByKeys(List<String> rowKeys) throws IOException {
-    return filterRecordsImpl(new TreeSet<>(rowKeys));
+    synchronized (sharedScannerLock) {
+      return candidateRowKeys.stream().filter(k -> {
+        try {
+          return isKeyAvailable(k, sharedScanner);
+        } catch (IOException e) {
+          LOG.error("Failed to check key availability: " + k);
+          return false;
+        }
+      }).collect(Collectors.toSet());
+    }
   }
 
-  /**
-   * Filter records by sorted keys.
-   * <p>
-   * TODO: Implement single seek and sequential scan till the last candidate key
-   * instead of repeated seeks.
-   *
-   * @param sortedCandidateRowKeys - Sorted set of keys to fetch records for
-   * @return Map of keys to fetched records
-   * @throws IOException When the deserialization of records fail
-   */
-  private synchronized Map<String, R> filterRecordsImpl(TreeSet<String> sortedCandidateRowKeys) throws IOException {
-    HashMap<String, R> filteredRecords = new HashMap<>();
-    for (String key : sortedCandidateRowKeys) {
-      Option<R> record = getRecordByKey(key);
-      if (record.isPresent()) {
-        filteredRecords.put(key, record.get());
-      }
+  @SuppressWarnings("unchecked")
+  @Override
+  public Option<R> getRecordByKey(String key, Schema readerSchema) throws IOException {
+    synchronized (sharedScannerLock) {
+      return (Option<R>) fetchRecordByKeyInternal(sharedScanner, key, getSchema(), readerSchema);
     }
-    return filteredRecords;
   }
 
-  /**
-   * Reads all the records with given schema.
-   *
-   * <p>NOTE: This should only be used for testing,
-   * the records are materialized eagerly into a list and returned,
-   * use {@code getRecordIterator} where possible.
-   */
-  private List<Pair<String, R>> readAllRecords(Schema writerSchema, Schema readerSchema) {
-    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
-    List<Pair<String, R>> recordList = new LinkedList<>();
-    try {
-      final HFileScanner scanner = reader.getScanner(false, false);
-      if (scanner.seekTo()) {
-        do {
-          Cell c = scanner.getKeyValue();
-          final Pair<String, R> keyAndRecordPair = getRecordFromCell(c, writerSchema, readerSchema, keyFieldSchema);
-          recordList.add(keyAndRecordPair);
-        } while (scanner.next());
-      }
+  @SuppressWarnings("unchecked")
+  @Override
+  public ClosableIterator<R> getRecordIterator(Schema readerSchema) throws IOException {
+    // TODO eval whether seeking scanner would be faster than pread
+    HFileScanner scanner = getHFileScanner(reader, false);
+    return (ClosableIterator<R>) new RecordIterator(scanner, getSchema(), readerSchema);
+  }
 
-      return recordList;
-    } catch (IOException e) {
-      throw new HoodieException("Error reading hfile " + path + " as a dataframe", e);
-    }
+  @SuppressWarnings("unchecked")
+  @Override
+  public ClosableIterator<R> getRecordsByKeysIterator(List<String> keys, Schema readerSchema) throws IOException {
+    // We're caching blocks for this scanner to minimize amount of traffic
+    // to the underlying storage as we fetched (potentially) sparsely distributed
+    // keys
+    HFileScanner scanner = getHFileScanner(reader, true);
+    return (ClosableIterator<R>) new RecordByKeyIterator(scanner, keys, getSchema(), readerSchema);
   }
 
-  /**
-   * Reads all the records with current schema.
-   *
-   * <p>NOTE: This should only be used for testing,
-   * the records are materialized eagerly into a list and returned,
-   * use {@code getRecordIterator} where possible.
-   */
-  public List<Pair<String, R>> readAllRecords() {
-    Schema schema = getSchema();
-    return readAllRecords(schema, schema);
+  @SuppressWarnings("unchecked")
+  @Override
+  public ClosableIterator<R> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema readerSchema) throws IOException {
+    // We're caching blocks for this scanner to minimize amount of traffic
+    // to the underlying storage as we fetched (potentially) sparsely distributed
+    // keys
+    HFileScanner scanner = getHFileScanner(reader, true);
+    return (ClosableIterator<R>) new RecordByKeyPrefixIterator(scanner, keyPrefixes, getSchema(), readerSchema);
   }
 
-  /**
-   * Reads all the records with current schema and filtering keys.
-   *
-   * <p>NOTE: This should only be used for testing,
-   * the records are materialized eagerly into a list and returned,
-   * use {@code getRecordIterator} where possible.
-   */
-  public List<Pair<String, R>> readRecords(List<String> keys) throws IOException {
-    return readRecords(keys, getSchema());
+  @Override
+  public long getTotalRecords() {
+    // NOTE: This access to reader is thread-safe
+    return reader.getEntries();
   }
 
-  /**
-   * Reads all the records with given schema and filtering keys.
-   *
-   * <p>NOTE: This should only be used for testing,
-   * the records are materialized eagerly into a list and returned,
-   * use {@code getRecordIterator} where possible.
-   */
-  public List<Pair<String, R>> readRecords(List<String> keys, Schema schema) throws IOException {
-    this.schema = schema;
-    reader.loadFileInfo();
-    List<Pair<String, R>> records = new ArrayList<>();
-    for (String key: keys) {
-      Option<R> value = getRecordByKey(key, schema);
-      if (value.isPresent()) {
-        records.add(new Pair(key, value.get()));
+  @Override
+  public void close() {
+    try {
+      synchronized (this) {
+        reader.close();
       }
+    } catch (IOException e) {
+      throw new HoodieIOException("Error closing the hfile reader", e);
     }
-    return records;
   }
 
-  public ClosableIterator<R> getRecordIterator(List<String> keys, Schema schema) throws IOException {
-    this.schema = schema;
-    reader.loadFileInfo();
-    Iterator<String> iterator = keys.iterator();
-    return new ClosableIterator<R>() {
-      private R next;
-      @Override
-      public void close() {
+  private boolean isKeyAvailable(String key, HFileScanner keyScanner) throws IOException {
+    final KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
+    return keyScanner.seekTo(kv) == 0;
+  }
+
+  private static Iterator<GenericRecord> getRecordByKeyPrefixIteratorInternal(HFileScanner scanner,
+                                                                              String keyPrefix,
+                                                                              Schema writerSchema,
+                                                                              Schema readerSchema) throws IOException {
+    KeyValue kv = new KeyValue(keyPrefix.getBytes(), null, null, null);
+
+    // NOTE: HFile persists both keys/values as bytes, therefore lexicographical sorted is
+    //       essentially employed
+    //
+    // For the HFile containing list of cells c[0], c[1], ..., c[N], `seekTo(cell)` would return
+    // following:
+    //    a) -1, if cell < c[0], no position;
+    //    b) 0, such that c[i] = cell and scanner is left in position i;
+    //    c) and 1, such that c[i] < cell, and scanner is left in position i.
+    //
+    // Consider entries w/ the following keys in HFile: [key01, key02, key03, key04,..., key20];
+    // In case looked up key-prefix is
+    //    - "key", `seekTo()` will return -1 and place the cursor just before "key01",
+    //    `getCell()` will return "key01" entry
+    //    - "key03", `seekTo()` will return 0 (exact match) and place the cursor just before "key03",
+    //    `getCell()` will return "key03" entry
+    //    - "key1", `seekTo()` will return 1 (first not lower than) and place the cursor just before
+    //    "key10" (i.e. on "key09");
+    //
+    int val = scanner.seekTo(kv);
+    if (val == 1) {
+      // Try moving to next entry, matching the prefix key; if we're at the EOF,
+      // `next()` will return false
+      if (!scanner.next()) {
+        return Collections.emptyIterator();
       }
+    }
+
+    class KeyPrefixIterator implements Iterator<GenericRecord> {
+      private GenericRecord next = null;
+      private boolean eof = false;
 
       @Override
       public boolean hasNext() {
-        try {
-          while (iterator.hasNext()) {
-            Option<R> value = getRecordByKey(iterator.next(), schema);
-            if (value.isPresent()) {
-              next = value.get();
-              return true;
-            }
-          }
+        if (next != null) {
+          return true;
+        } else if (eof) {
+          return false;
+        }
+
+        Cell c = Objects.requireNonNull(scanner.getCell());
+        byte[] keyBytes = copyKeyFromCell(c);
+        String key = new String(keyBytes);
+        // Check whether we're still reading records corresponding to the key-prefix
+        if (!key.startsWith(keyPrefix)) {
           return false;
+        }
+
+        // Extract the byte value before releasing the lock since we cannot hold on to the returned cell afterwards
+        byte[] valueBytes = copyValueFromCell(c);
+        try {
+          next = deserialize(keyBytes, valueBytes, writerSchema, readerSchema);
+          // In case scanner is not able to advance, it means we reached EOF
+          eof = !scanner.next();
         } catch (IOException e) {
-          throw new HoodieIOException("unable to read next record from hfile ", e);
+          throw new HoodieIOException("Failed to deserialize payload", e);
         }
+
+        return true;
       }
 
       @Override
-      public R next() {
+      public GenericRecord next() {
+        GenericRecord next = this.next;
+        this.next = null;
         return next;
       }
-    };
+    }
+
+    return new KeyPrefixIterator();
   }
 
-  @Override
-  public Iterator getRecordIterator(Schema readerSchema) throws IOException {
-    final HFileScanner scanner = reader.getScanner(false, false);
-    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
-    ValidationUtils.checkState(keyFieldSchema != null,
-        "Missing key field '" + KEY_FIELD_NAME + "' in the schema!");
-    return new Iterator<R>() {
-      private R next = null;
-      private boolean eof = false;
+  private static Option<GenericRecord> fetchRecordByKeyInternal(HFileScanner scanner, String key, Schema writerSchema, Schema readerSchema) throws IOException {
+    KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
+    if (scanner.seekTo(kv) != 0) {
+      return Option.empty();
+    }
 
-      @Override
-      public boolean hasNext() {
-        try {
-          // To handle when hasNext() is called multiple times for idempotency and/or the first time
-          if (this.next == null && !this.eof) {
-            if (!scanner.isSeeked() && scanner.seekTo()) {
-              final Pair<String, R> keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
-              this.next = keyAndRecordPair.getSecond();
-            }
-          }
-          return this.next != null;
-        } catch (IOException io) {
-          throw new HoodieIOException("unable to read next record from hfile ", io);
-        }
-      }
+    Cell c = scanner.getCell();
+    byte[] valueBytes = copyValueFromCell(c);
+    GenericRecord record = deserialize(key.getBytes(), valueBytes, writerSchema, readerSchema);
 
-      @Override
-      public R next() {
-        try {
-          // To handle case when next() is called before hasNext()
-          if (this.next == null) {
-            if (!hasNext()) {
-              throw new HoodieIOException("No more records left to read from hfile");
-            }
-          }
-          R retVal = this.next;
-          if (scanner.next()) {
-            final Pair<String, R> keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
-            this.next = keyAndRecordPair.getSecond();
-          } else {
-            this.next = null;
-            this.eof = true;
-          }
-          return retVal;
-        } catch (IOException io) {
-          throw new HoodieIOException("unable to read next record from parquet file ", io);
-        }
-      }
-    };
+    return Option.of(record);
   }
 
-  private boolean isKeyAvailable(String key) throws IOException {
-    final KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
-    synchronized (this) {
-      if (keyScanner == null) {
-        keyScanner = reader.getScanner(false, false);
-      }
-      if (keyScanner.seekTo(kv) == 0) {
-        return true;
-      }
+  private static GenericRecord getRecordFromCell(Cell cell, Schema writerSchema, Schema readerSchema) throws IOException {
+    final byte[] keyBytes = copyKeyFromCell(cell);
+    final byte[] valueBytes = copyValueFromCell(cell);
+    return deserialize(keyBytes, valueBytes, writerSchema, readerSchema);
+  }
+
+  private static GenericRecord deserializeUnchecked(final byte[] keyBytes,
+                                                    final byte[] valueBytes,
+                                                    Schema writerSchema,
+                                                    Schema readerSchema) {
+    try {
+      return deserialize(keyBytes, valueBytes, writerSchema, readerSchema);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to deserialize payload", e);
     }
-    return false;
   }
 
-  @Override
-  public Option getRecordByKey(String key, Schema readerSchema) throws IOException {
-    byte[] value = null;
-    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
-    ValidationUtils.checkState(keyFieldSchema != null);
-    KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
+  private static GenericRecord deserialize(final byte[] keyBytes,
+                                           final byte[] valueBytes,
+                                           Schema writerSchema,
+                                           Schema readerSchema) throws IOException {
+    GenericRecord record = HoodieAvroUtils.bytesToAvro(valueBytes, writerSchema, readerSchema);
 
-    synchronized (this) {
-      if (keyScanner == null) {
-        keyScanner = reader.getScanner(false, false);
+    getKeySchema(readerSchema).ifPresent(keyFieldSchema -> {
+      final Object keyObject = record.get(keyFieldSchema.pos());
+      if (keyObject != null && keyObject.toString().isEmpty()) {
+        record.put(keyFieldSchema.pos(), new String(keyBytes));
       }
+    });
 
-      if (keyScanner.seekTo(kv) == 0) {
-        Cell c = keyScanner.getKeyValue();
-        // Extract the byte value before releasing the lock since we cannot hold on to the returned cell afterwards
-        value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength());
-      }
-    }
+    return record;
+  }
 
-    if (value != null) {
-      R record = deserialize(key.getBytes(), value, getSchema(), readerSchema, keyFieldSchema);
-      return Option.of(record);
-    }
+  private static Schema fetchSchema(HFile.Reader reader) {
+    HFileInfo fileInfo = reader.getHFileInfo();
+    return new Schema.Parser().parse(new String(fileInfo.get(SCHEMA_KEY.getBytes())));
+  }
 
-    return Option.empty();
+  private static byte[] copyKeyFromCell(Cell cell) {
+    return Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
   }
 
-  private Pair<String, R> getRecordFromCell(Cell cell, Schema writerSchema, Schema readerSchema, Option<Schema.Field> keyFieldSchema) throws IOException {
-    final byte[] keyBytes = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
-    final byte[] valueBytes = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
-    R record = deserialize(keyBytes, valueBytes, writerSchema, readerSchema, keyFieldSchema);
-    return new Pair<>(new String(keyBytes), record);
+  private static byte[] copyValueFromCell(Cell c) {
+    return Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength());
   }
 
   /**
-   * Deserialize the record byte array contents to record object.
-   *
-   * @param keyBytes       - Record key as byte array
-   * @param valueBytes     - Record content as byte array
-   * @param writerSchema   - Writer schema
-   * @param readerSchema   - Reader schema
-   * @param keyFieldSchema - Key field id in the schema
-   * @return Deserialized record object
+   * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY
+   * <p>
+   * Reads all the records with given schema
    */
-  private R deserialize(final byte[] keyBytes, final byte[] valueBytes, Schema writerSchema, Schema readerSchema,
-                        Option<Schema.Field> keyFieldSchema) throws IOException {
-    R record = (R) HoodieAvroUtils.bytesToAvro(valueBytes, writerSchema, readerSchema);
-    materializeRecordIfNeeded(keyBytes, record, keyFieldSchema);
-    return record;
+  public static <R extends IndexedRecord> List<R> readAllRecords(HoodieHFileReader<R> reader) throws IOException {
+    Schema schema = reader.getSchema();
+    return toStream(reader.getRecordIterator(schema))
+        .collect(Collectors.toList());
   }
 
   /**
-   * Materialize the record for any missing fields, if needed.
-   *
-   * @param keyBytes       - Key byte array
-   * @param record         - Record object to materialize
-   * @param keyFieldSchema - Key field id in the schema
+   * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY
+   * <p>
+   * Reads all the records with given schema and filtering keys.
    */
-  private void materializeRecordIfNeeded(final byte[] keyBytes, R record, Option<Schema.Field> keyFieldSchema) {
-    if (keyFieldSchema.isPresent()) {
-      final Object keyObject = record.get(keyFieldSchema.get().pos());
-      if (keyObject != null && keyObject.toString().isEmpty()) {
-        record.put(keyFieldSchema.get().pos(), new String(keyBytes));
+  public static <R extends IndexedRecord> List<R> readRecords(HoodieHFileReader<R> reader,
+                                                              List<String> keys) throws IOException {
+    return readRecords(reader, keys, reader.getSchema());
+  }
+
+  /**
+   * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY
+   * <p>
+   * Reads all the records with given schema and filtering keys.
+   */
+  public static <R extends IndexedRecord> List<R> readRecords(HoodieHFileReader<R> reader,
+                                                              List<String> keys,
+                                                              Schema schema) throws IOException {
+    Collections.sort(keys);
+    return toStream(reader.getRecordsByKeysIterator(keys, schema))
+        .collect(Collectors.toList());
+  }
+
+  private static HFileScanner getHFileScanner(HFile.Reader reader, boolean cacheBlocks) {
+    // NOTE: Only scanners created in Positional Read ("pread") mode could share the same reader,
+    //       since scanners in default mode will be seeking w/in the underlying stream
+    return reader.getScanner(cacheBlocks, true);
+  }
+
+  private static Option<Schema.Field> getKeySchema(Schema schema) {
+    return Option.ofNullable(schema.getField(KEY_FIELD_NAME));
+  }
+
+  private static class RecordByKeyPrefixIterator implements ClosableIterator<GenericRecord> {
+    private final Iterator<String> keyPrefixesIterator;
+    private Iterator<GenericRecord> recordsIterator;
+
+    private final HFileScanner scanner;
+
+    private final Schema writerSchema;
+    private final Schema readerSchema;
+
+    private GenericRecord next = null;
+
+    RecordByKeyPrefixIterator(HFileScanner scanner, List<String> keyPrefixes, Schema writerSchema, Schema readerSchema) throws IOException {
+      this.keyPrefixesIterator = keyPrefixes.iterator();
+
+      this.scanner = scanner;
+      this.scanner.seekTo(); // position at the beginning of the file
+
+      this.writerSchema = writerSchema;
+      this.readerSchema = readerSchema;
+    }
+
+    @Override
+    public boolean hasNext() {
+      try {
+        while (true) {
+          // NOTE: This is required for idempotency
+          if (next != null) {
+            return true;
+          } else if (recordsIterator != null && recordsIterator.hasNext()) {
+            next = recordsIterator.next();
+            return true;
+          } else if (keyPrefixesIterator.hasNext()) {
+            String currentKeyPrefix = keyPrefixesIterator.next();
+            recordsIterator =
+                getRecordByKeyPrefixIteratorInternal(scanner, currentKeyPrefix, writerSchema, readerSchema);
+          } else {
+            return false;
+          }
+        }
+      } catch (IOException e) {
+        throw new HoodieIOException("Unable to read next record from HFile", e);
       }
     }
+
+    @Override
+    public GenericRecord next() {
+      GenericRecord next = this.next;
+      this.next = null;
+      return next;
+    }
+
+    @Override
+    public void close() {
+      scanner.close();
+    }
   }
 
-  @Override
-  public long getTotalRecords() {
-    return reader.getEntries();
+  private static class RecordByKeyIterator implements ClosableIterator<GenericRecord> {
+    private final Iterator<String> keyIterator;
+
+    private final HFileScanner scanner;
+
+    private final Schema readerSchema;
+    private final Schema writerSchema;
+
+    private GenericRecord next = null;
+
+    RecordByKeyIterator(HFileScanner scanner, List<String> keys, Schema writerSchema, Schema readerSchema) throws IOException {
+      this.keyIterator = keys.iterator();
+
+      this.scanner = scanner;
+      this.scanner.seekTo(); // position at the beginning of the file
+
+      this.writerSchema = writerSchema;
+      this.readerSchema = readerSchema;
+    }
+
+    @Override
+    public boolean hasNext() {
+      try {
+        // NOTE: This is required for idempotency
+        if (next != null) {
+          return true;
+        }
+
+        while (keyIterator.hasNext()) {
+          Option<GenericRecord> value = fetchRecordByKeyInternal(scanner, keyIterator.next(), writerSchema, readerSchema);
+          if (value.isPresent()) {
+            next = value.get();
+            return true;
+          }
+        }
+        return false;
+      } catch (IOException e) {
+        throw new HoodieIOException("unable to read next record from hfile ", e);
+      }
+    }
+
+    @Override
+    public GenericRecord next() {
+      GenericRecord next = this.next;
+      this.next = null;
+      return next;
+    }
+
+    @Override
+    public void close() {
+      scanner.close();
+    }
   }
 
-  @Override
-  public synchronized void close() {
-    try {
-      reader.close();
-      reader = null;
-      if (fsDataInputStream != null) {
-        fsDataInputStream.close();
+  private static class RecordIterator implements ClosableIterator<GenericRecord> {
+    private final HFileScanner scanner;
+
+    private final Schema writerSchema;
+    private final Schema readerSchema;
+
+    private GenericRecord next = null;
+
+    RecordIterator(HFileScanner scanner, Schema writerSchema, Schema readerSchema) {
+      this.scanner = scanner;
+      this.writerSchema = writerSchema;
+      this.readerSchema = readerSchema;
+    }
+
+    @Override
+    public boolean hasNext() {
+      try {
+        // NOTE: This is required for idempotency
+        if (next != null) {
+          return true;
+        }
+
+        boolean hasRecords;
+        if (!scanner.isSeeked()) {
+          hasRecords = scanner.seekTo();
+        } else {
+          hasRecords = scanner.next();
+        }
+
+        if (!hasRecords) {
+          return false;
+        }
+
+        this.next = getRecordFromCell(scanner.getCell(), writerSchema, readerSchema);
+        return true;
+      } catch (IOException io) {
+        throw new HoodieIOException("unable to read next record from hfile ", io);
       }
-      keyScanner = null;
-    } catch (IOException e) {
-      throw new HoodieIOException("Error closing the hfile reader", e);
+    }
+
+    @Override
+    public GenericRecord next() {
+      GenericRecord next = this.next;
+      this.next = null;
+      return next;
+    }
+
+    @Override
+    public void close() {
+      scanner.close();
     }
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java
new file mode 100644
index 0000000000000..3767ea1832579
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileInfo;
+import org.apache.hadoop.hbase.io.hfile.ReaderContext;
+import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
+
+import java.io.IOException;
+
+/**
+ * Util class for HFile reading and writing in Hudi
+ */
+public class HoodieHFileUtils {
+  // Based on HBase 2.4.9, the primaryReplicaReader is mainly used for constructing
+  // block cache key, so if we do not use block cache then it is OK to set it as any
+  // value. We use true here.
+  private static final boolean USE_PRIMARY_REPLICA_READER = true;
+
+  /**
+   * Creates HFile reader for a file with default `primaryReplicaReader` as true.
+   *
+   * @param fs            File system.
+   * @param path          Path to file to read.
+   * @param cacheConfig   Cache configuration.
+   * @param configuration Configuration
+   * @return HFile reader
+   * @throws IOException Upon error.
+   */
+  public static HFile.Reader createHFileReader(
+      FileSystem fs, Path path, CacheConfig cacheConfig, Configuration configuration) throws IOException {
+    return HFile.createReader(fs, path, cacheConfig, USE_PRIMARY_REPLICA_READER, configuration);
+  }
+
+  /**
+   * Creates HFile reader for byte array with default `primaryReplicaReader` as true.
+   *
+   * @param fs        File system.
+   * @param dummyPath Dummy path to file to read.
+   * @param content   Content in byte array.
+   * @return HFile reader
+   * @throws IOException Upon error.
+   */
+  public static HFile.Reader createHFileReader(
+      FileSystem fs, Path dummyPath, byte[] content) throws IOException {
+    Configuration conf = new Configuration();
+    HoodieHFileReader.SeekableByteArrayInputStream bis = new HoodieHFileReader.SeekableByteArrayInputStream(content);
+    FSDataInputStream fsdis = new FSDataInputStream(bis);
+    FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis);
+    ReaderContext context = new ReaderContextBuilder()
+        .withFilePath(dummyPath)
+        .withInputStreamWrapper(stream)
+        .withFileSize(content.length)
+        .withFileSystem(fs)
+        .withPrimaryReplicaReader(USE_PRIMARY_REPLICA_READER)
+        .withReaderType(ReaderContext.ReaderType.STREAM)
+        .build();
+    HFileInfo fileInfo = new HFileInfo(context, conf);
+    HFile.Reader reader = HFile.createReader(context, fileInfo, new CacheConfig(conf), conf);
+    fileInfo.initMetaAndIndex(reader);
+    return reader;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java
index 319f8d7da1add..5431bf3782af2 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java
@@ -18,9 +18,6 @@
 
 package org.apache.hudi.io.storage;
 
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Set;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
@@ -29,6 +26,7 @@
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.util.AvroOrcUtils;
 import org.apache.hudi.common.util.BaseFileUtils;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.OrcReaderIterator;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.orc.OrcFile;
@@ -37,6 +35,9 @@
 import org.apache.orc.RecordReader;
 import org.apache.orc.TypeDescription;
 
+import java.io.IOException;
+import java.util.Set;
+
 public class HoodieOrcReader<R extends IndexedRecord> implements HoodieFileReader {
   private Path path;
   private Configuration conf;
@@ -64,12 +65,12 @@ public Set<String> filterRowKeys(Set candidateRowKeys) {
   }
 
   @Override
-  public Iterator<R> getRecordIterator(Schema schema) throws IOException {
+  public ClosableIterator<R> getRecordIterator(Schema schema) throws IOException {
     try {
       Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
       TypeDescription orcSchema = AvroOrcUtils.createOrcSchema(schema);
       RecordReader recordReader = reader.rows(new Options(conf).schema(orcSchema));
-      return new OrcReaderIterator(recordReader, schema, orcSchema);
+      return new OrcReaderIterator<>(recordReader, schema, orcSchema);
     } catch (IOException io) {
       throw new HoodieIOException("Unable to create an ORC reader.", io);
     }
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
index 9ad07dfafbf60..804e4354c749e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
@@ -18,10 +18,6 @@
 
 package org.apache.hudi.io.storage;
 
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Set;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
@@ -29,15 +25,23 @@
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.util.BaseFileUtils;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.ParquetReaderIterator;
 import org.apache.parquet.avro.AvroParquetReader;
 import org.apache.parquet.avro.AvroReadSupport;
 import org.apache.parquet.hadoop.ParquetReader;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
 public class HoodieParquetReader<R extends IndexedRecord> implements HoodieFileReader<R> {
+  
   private final Path path;
   private final Configuration conf;
   private final BaseFileUtils parquetUtils;
+  private List<ParquetReaderIterator> readerIterators = new ArrayList<>();
 
   public HoodieParquetReader(Configuration configuration, Path path) {
     this.conf = configuration;
@@ -61,10 +65,12 @@ public Set<String> filterRowKeys(Set<String> candidateRowKeys) {
   }
 
   @Override
-  public Iterator<R> getRecordIterator(Schema schema) throws IOException {
+  public ClosableIterator<R> getRecordIterator(Schema schema) throws IOException {
     AvroReadSupport.setAvroReadSchema(conf, schema);
     ParquetReader<R> reader = AvroParquetReader.<R>builder(path).withConf(conf).build();
-    return new ParquetReaderIterator<>(reader);
+    ParquetReaderIterator<R> parquetReaderIterator = new ParquetReaderIterator<>(reader);
+    readerIterators.add(parquetReaderIterator);
+    return parquetReaderIterator;
   }
 
   @Override
@@ -74,6 +80,7 @@ public Schema getSchema() {
 
   @Override
   public void close() {
+    readerIterators.forEach(ParquetReaderIterator::close);
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
index 2dce66e700479..2036500ac6567 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
@@ -284,7 +284,7 @@ protected List<String> fetchAllPartitionPaths() {
 
     List<String> partitions = Collections.emptyList();
     if (hoodieRecord.isPresent()) {
-      mayBeHandleSpuriousDeletes(hoodieRecord, "\"all partitions\"");
+      handleSpuriousDeletes(hoodieRecord, "\"all partitions\"");
       partitions = hoodieRecord.get().getData().getFilenames();
       // Partition-less tables have a single empty partition
       if (partitions.contains(NON_PARTITIONED_NAME)) {
@@ -315,7 +315,7 @@ FileStatus[] fetchAllFilesInPartition(Path partitionPath) throws IOException {
 
     FileStatus[] statuses = {};
     if (hoodieRecord.isPresent()) {
-      mayBeHandleSpuriousDeletes(hoodieRecord, partitionName);
+      handleSpuriousDeletes(hoodieRecord, partitionName);
       statuses = hoodieRecord.get().getData().getFileStatuses(hadoopConf.get(), partitionPath);
     }
 
@@ -350,7 +350,7 @@ Map<String, FileStatus[]> fetchAllFilesInPartitionPaths(List<Path> partitionPath
 
     for (Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry: partitionsFileStatus) {
       if (entry.getValue().isPresent()) {
-        mayBeHandleSpuriousDeletes(entry.getValue(), entry.getKey());
+        handleSpuriousDeletes(entry.getValue(), entry.getKey());
         result.put(partitionInfo.get(entry.getKey()).toString(), entry.getValue().get().getData().getFileStatuses(hadoopConf.get(), partitionInfo.get(entry.getKey())));
       }
     }
@@ -360,11 +360,11 @@ Map<String, FileStatus[]> fetchAllFilesInPartitionPaths(List<Path> partitionPath
   }
 
   /**
-   * Maybe handle spurious deletes. Depending on config, throw an exception or log a warn msg.
+   * Handle spurious deletes. Depending on config, throw an exception or log a warn msg.
    * @param hoodieRecord instance of {@link HoodieRecord} of interest.
    * @param partitionName partition name of interest.
    */
-  private void mayBeHandleSpuriousDeletes(Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord, String partitionName) {
+  private void handleSpuriousDeletes(Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord, String partitionName) {
     if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
       if (metadataConfig.ignoreSpuriousDeletes()) {
         LOG.warn("Metadata record for " + partitionName + " encountered some files to be deleted which was not added before. "
@@ -378,7 +378,7 @@ private void mayBeHandleSpuriousDeletes(Option<HoodieRecord<HoodieMetadataPayloa
 
   protected abstract Option<HoodieRecord<HoodieMetadataPayload>> getRecordByKey(String key, String partitionName);
 
-  protected abstract List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> key, String partitionName);
+  public abstract List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> key, String partitionName);
 
   protected HoodieEngineContext getEngineContext() {
     return engineContext != null ? engineContext : new HoodieLocalEngineContext(hadoopConf.get());
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
index 1bb18bad16e40..b77bb12c49447 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
@@ -21,9 +21,11 @@
 import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
@@ -96,7 +98,7 @@ public List<String> getAllPartitionPaths() throws IOException {
               } else if (!fileStatus.getPath().getName().equals(HoodieTableMetaClient.METAFOLDER_NAME)) {
                 pathsToList.add(fileStatus.getPath());
               }
-            } else if (fileStatus.getPath().getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
+            } else if (fileStatus.getPath().getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX)) {
               String partitionName = FSUtils.getRelativePartitionPath(new Path(datasetBasePath), fileStatus.getPath().getParent());
               partitionPaths.add(partitionName);
             }
@@ -159,4 +161,9 @@ public Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final
       throws HoodieMetadataException {
     throw new HoodieMetadataException("Unsupported operation: getColumnsStats!");
   }
+
+  @Override
+  public HoodieData<HoodieRecord<HoodieMetadataPayload>> getRecordsByKeyPrefixes(List<String> keyPrefixes, String partitionName) {
+    throw new HoodieMetadataException("Unsupported operation: getRecordsByKeyPrefixes!");
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index 7b4dbd9a0b935..cf941bb70cc3b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -28,23 +28,24 @@
 import org.apache.hudi.common.config.HoodieCommonConfig;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.function.SerializableFunction;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SpillableMapUtils;
-import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
@@ -59,14 +60,23 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
+import static org.apache.hudi.common.util.CollectionUtils.toStream;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_FILES;
+
 /**
  * Table metadata provided by an internal DFS backed Hudi metadata table.
  */
@@ -74,6 +84,8 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
 
   private static final Logger LOG = LogManager.getLogger(HoodieBackedTableMetadata.class);
 
+  private static final Schema METADATA_RECORD_SCHEMA = HoodieMetadataRecord.getClassSchema();
+
   private String metadataBasePath;
   // Metadata table's timeline and metaclient
   private HoodieTableMetaClient metadataMetaClient;
@@ -130,28 +142,79 @@ protected Option<HoodieRecord<HoodieMetadataPayload>> getRecordByKey(String key,
   }
 
   @Override
-  protected List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keys,
-                                                                                             String partitionName) {
+  public HoodieData<HoodieRecord<HoodieMetadataPayload>> getRecordsByKeyPrefixes(List<String> keyPrefixes,
+                                                                                 String partitionName) {
+    // NOTE: Since we partition records to a particular file-group by full key, we will have
+    //       to scan all file-groups for all key-prefixes as each of these might contain some
+    //       records matching the key-prefix
+    List<FileSlice> partitionFileSlices =
+        HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, partitionName);
+
+    return engineContext.parallelize(partitionFileSlices)
+        .flatMap(
+            (SerializableFunction<FileSlice, Iterator<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>>>) fileSlice -> {
+              // NOTE: Since this will be executed by executors, we can't access previously cached
+              //       readers, and therefore have to always open new ones
+              Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
+                  openReaders(partitionName, fileSlice);
+              try {
+                List<Long> timings = new ArrayList<>();
+
+                HoodieFileReader baseFileReader = readers.getKey();
+                HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
+
+                if (baseFileReader == null && logRecordScanner == null) {
+                  // TODO: what do we do if both does not exist? should we throw an exception and let caller do the fallback ?
+                  return Collections.emptyIterator();
+                }
+
+                boolean fullKeys = false;
+
+                Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords =
+                    readLogRecords(logRecordScanner, keyPrefixes, fullKeys, timings);
+
+                List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> mergedRecords =
+                    readFromBaseAndMergeWithLogRecords(baseFileReader, keyPrefixes, fullKeys, logRecords, timings, partitionName);
+
+                LOG.debug(String.format("Metadata read for %s keys took [baseFileRead, logMerge] %s ms",
+                    keyPrefixes.size(), timings));
+
+                return mergedRecords.iterator();
+              } catch (IOException ioe) {
+                throw new HoodieIOException("Error merging records from metadata table for  " + keyPrefixes.size() + " key : ", ioe);
+              } finally {
+                closeReader(readers);
+              }
+            }
+        )
+        .map(keyRecordPair -> keyRecordPair.getValue().orElse(null))
+        .filter(Objects::nonNull);
+  }
+
+  @Override
+  public List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keys,
+                                                                                          String partitionName) {
     Map<Pair<String, FileSlice>, List<String>> partitionFileSliceToKeysMap = getPartitionFileSliceToKeysMapping(partitionName, keys);
     List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
     AtomicInteger fileSlicesKeysCount = new AtomicInteger();
     partitionFileSliceToKeysMap.forEach((partitionFileSlicePair, fileSliceKeys) -> {
-      Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers = openReadersIfNeeded(partitionName,
-          partitionFileSlicePair.getRight());
+      Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
+          getOrCreateReaders(partitionName, partitionFileSlicePair.getRight());
       try {
         List<Long> timings = new ArrayList<>();
         HoodieFileReader baseFileReader = readers.getKey();
         HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
-
         if (baseFileReader == null && logRecordScanner == null) {
           return;
         }
 
-        // local map to assist in merging with base file records
-        Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords = readLogRecords(logRecordScanner,
-            fileSliceKeys, timings);
-        result.addAll(readFromBaseAndMergeWithLogRecords(baseFileReader, fileSliceKeys, logRecords,
+        boolean fullKeys = true;
+        Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords =
+            readLogRecords(logRecordScanner, fileSliceKeys, fullKeys, timings);
+
+        result.addAll(readFromBaseAndMergeWithLogRecords(baseFileReader, fileSliceKeys, fullKeys, logRecords,
             timings, partitionName));
+
         LOG.debug(String.format("Metadata read for %s keys took [baseFileRead, logMerge] %s ms",
             fileSliceKeys.size(), timings));
         fileSlicesKeysCount.addAndGet(fileSliceKeys.size());
@@ -164,86 +227,131 @@ protected List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRec
       }
     });
 
-    ValidationUtils.checkState(keys.size() == fileSlicesKeysCount.get());
     return result;
   }
 
   private Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> readLogRecords(HoodieMetadataMergedLogRecordReader logRecordScanner,
-                                                                                  List<String> keys, List<Long> timings) {
+                                                                                  List<String> keys,
+                                                                                  boolean fullKey,
+                                                                                  List<Long> timings) {
     HoodieTimer timer = new HoodieTimer().startTimer();
-    Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords = new HashMap<>();
-    // Retrieve records from log file
     timer.startTimer();
-    if (logRecordScanner != null) {
-      if (metadataConfig.enableFullScan()) {
-        // path which does full scan of log files
-        for (String key : keys) {
-          logRecords.put(key, logRecordScanner.getRecordByKey(key).get(0).getValue());
-        }
-      } else {
-        // this path will do seeks pertaining to the keys passed in
-        List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> logRecordsList = logRecordScanner.getRecordsByKeys(keys);
-        for (Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : logRecordsList) {
-          logRecords.put(entry.getKey(), entry.getValue());
-        }
+
+    if (logRecordScanner == null) {
+      timings.add(timer.endTimer());
+      return Collections.emptyMap();
+    }
+
+    String partitionName = logRecordScanner.getPartitionName().get();
+
+    Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords = new HashMap<>();
+    if (isFullScanAllowedForPartition(partitionName)) {
+      checkArgument(fullKey, "If full-scan is required, only full keys could be used!");
+      // Path which does full scan of log files
+      for (String key : keys) {
+        logRecords.put(key, logRecordScanner.getRecordByKey(key).get(0).getValue());
       }
     } else {
-      for (String key : keys) {
-        logRecords.put(key, Option.empty());
+      // This path will do seeks pertaining to the keys passed in
+      List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> logRecordsList =
+          fullKey ? logRecordScanner.getRecordsByKeys(keys)
+              : logRecordScanner.getRecordsByKeyPrefixes(keys)
+                  .stream()
+                  .map(record -> Pair.of(record.getRecordKey(), Option.of(record)))
+                  .collect(Collectors.toList());
+
+      for (Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : logRecordsList) {
+        logRecords.put(entry.getKey(), entry.getValue());
       }
     }
+
     timings.add(timer.endTimer());
     return logRecords;
   }
 
   private List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> readFromBaseAndMergeWithLogRecords(HoodieFileReader baseFileReader,
-                                                                                                             List<String> keys, Map<String,
-      Option<HoodieRecord<HoodieMetadataPayload>>> logRecords, List<Long> timings, String partitionName) throws IOException {
-    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
-    // merge with base records
+                                                                                                             List<String> keys,
+                                                                                                             boolean fullKeys,
+                                                                                                             Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords,
+                                                                                                             List<Long> timings,
+                                                                                                             String partitionName) throws IOException {
     HoodieTimer timer = new HoodieTimer().startTimer();
     timer.startTimer();
-    HoodieRecord<HoodieMetadataPayload> hoodieRecord = null;
-    // Retrieve record from base file
-    if (baseFileReader != null) {
-      HoodieTimer readTimer = new HoodieTimer();
-      Map<String, GenericRecord> baseFileRecords = baseFileReader.getRecordsByKeys(keys);
-      for (String key : keys) {
-        readTimer.startTimer();
-        if (baseFileRecords.containsKey(key)) {
-          hoodieRecord = getRecord(Option.of(baseFileRecords.get(key)), partitionName);
-          metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
-          // merge base file record w/ log record if present
-          if (logRecords.containsKey(key) && logRecords.get(key).isPresent()) {
-            HoodieRecordPayload mergedPayload = logRecords.get(key).get().getData().preCombine(hoodieRecord.getData());
-            result.add(Pair.of(key, Option.of(new HoodieAvroRecord(hoodieRecord.getKey(), mergedPayload))));
-          } else {
-            // only base record
-            result.add(Pair.of(key, Option.of(hoodieRecord)));
-          }
-        } else {
-          // only log record
-          result.add(Pair.of(key, logRecords.get(key)));
-        }
-      }
-      timings.add(timer.endTimer());
-    } else {
-      // no base file at all
+
+    if (baseFileReader == null) {
+      // No base file at all
       timings.add(timer.endTimer());
-      for (Map.Entry<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : logRecords.entrySet()) {
-        result.add(Pair.of(entry.getKey(), entry.getValue()));
+      if (fullKeys) {
+        // In case full-keys (not key-prefixes) were provided, it's expected that the list of
+        // records will contain an (optional) entry for each corresponding key
+        return keys.stream()
+            .map(key -> Pair.of(key, logRecords.getOrDefault(key, Option.empty())))
+            .collect(Collectors.toList());
+      } else {
+        return logRecords.entrySet().stream()
+            .map(entry -> Pair.of(entry.getKey(), entry.getValue()))
+            .collect(Collectors.toList());
       }
     }
-    return result;
+
+    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
+
+    HoodieTimer readTimer = new HoodieTimer();
+    readTimer.startTimer();
+
+    Map<String, HoodieRecord<HoodieMetadataPayload>> records =
+        fetchBaseFileRecordsByKeys(baseFileReader, keys, fullKeys, partitionName);
+
+    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
+
+    // Iterate over all provided log-records, merging them into existing records
+    for (Option<HoodieRecord<HoodieMetadataPayload>> logRecordOpt : logRecords.values()) {
+      if (logRecordOpt.isPresent()) {
+        HoodieRecord<HoodieMetadataPayload> logRecord = logRecordOpt.get();
+        records.merge(
+            logRecord.getRecordKey(),
+            logRecord,
+            (oldRecord, newRecord) ->
+                new HoodieAvroRecord<>(oldRecord.getKey(), newRecord.getData().preCombine(oldRecord.getData()))
+        );
+      }
+    }
+
+    timings.add(timer.endTimer());
+
+    if (fullKeys) {
+      // In case full-keys (not key-prefixes) were provided, it's expected that the list of
+      // records will contain an (optional) entry for each corresponding key
+      return keys.stream()
+          .map(key -> Pair.of(key, Option.ofNullable(records.get(key))))
+          .collect(Collectors.toList());
+    } else {
+      return records.values().stream()
+          .map(record -> Pair.of(record.getRecordKey(), Option.of(record)))
+          .collect(Collectors.toList());
+    }
+  }
+
+  private Map<String, HoodieRecord<HoodieMetadataPayload>> fetchBaseFileRecordsByKeys(HoodieFileReader<GenericRecord> baseFileReader,
+                                                                                      List<String> keys,
+                                                                                      boolean fullKeys,
+                                                                                      String partitionName) throws IOException {
+    ClosableIterator<GenericRecord> records = fullKeys ? baseFileReader.getRecordsByKeysIterator(keys)
+        : baseFileReader.getRecordsByKeyPrefixIterator(keys);
+
+    return toStream(records)
+        .map(record -> Pair.of(
+            (String) record.get(HoodieMetadataPayload.KEY_FIELD_NAME),
+            composeRecord(record, partitionName)))
+        .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
   }
 
-  private HoodieRecord<HoodieMetadataPayload> getRecord(Option<GenericRecord> baseRecord, String partitionName) {
-    ValidationUtils.checkState(baseRecord.isPresent());
+  private HoodieRecord<HoodieMetadataPayload> composeRecord(GenericRecord avroRecord, String partitionName) {
     if (metadataTableConfig.populateMetaFields()) {
-      return SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(),
+      return SpillableMapUtils.convertToHoodieRecordPayload(avroRecord,
           metadataTableConfig.getPayloadClass(), metadataTableConfig.getPreCombineField(), false);
     }
-    return SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(),
+    return SpillableMapUtils.convertToHoodieRecordPayload(avroRecord,
         metadataTableConfig.getPayloadClass(), metadataTableConfig.getPreCombineField(),
         Pair.of(metadataTableConfig.getRecordKeyFieldProp(), metadataTableConfig.getPartitionFieldProp()),
         false, Option.of(partitionName));
@@ -263,10 +371,12 @@ private Map<Pair<String, FileSlice>, List<String>> getPartitionFileSliceToKeysMa
 
     Map<Pair<String, FileSlice>, List<String>> partitionFileSliceToKeysMap = new HashMap<>();
     for (String key : keys) {
-      final FileSlice slice = latestFileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(key,
-          latestFileSlices.size()));
-      final Pair<String, FileSlice> partitionNameFileSlicePair = Pair.of(partitionName, slice);
-      partitionFileSliceToKeysMap.computeIfAbsent(partitionNameFileSlicePair, k -> new ArrayList<>()).add(key);
+      if (!isNullOrEmpty(latestFileSlices)) {
+        final FileSlice slice = latestFileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(key,
+            latestFileSlices.size()));
+        final Pair<String, FileSlice> partitionNameFileSlicePair = Pair.of(partitionName, slice);
+        partitionFileSliceToKeysMap.computeIfAbsent(partitionNameFileSlicePair, k -> new ArrayList<>()).add(key);
+      }
     }
     return partitionFileSliceToKeysMap;
   }
@@ -275,34 +385,35 @@ private Map<Pair<String, FileSlice>, List<String>> getPartitionFileSliceToKeysMa
    * Create a file reader and the record scanner for a given partition and file slice
    * if readers are not already available.
    *
-   * @param partitionName - Partition name
-   * @param slice         - The file slice to open readers for
+   * @param partitionName    - Partition name
+   * @param slice            - The file slice to open readers for
    * @return File reader and the record scanner pair for the requested file slice
    */
-  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReadersIfNeeded(String partitionName, FileSlice slice) {
-    return partitionReaders.computeIfAbsent(Pair.of(partitionName, slice.getFileId()), k -> {
-      try {
-        HoodieTimer timer = new HoodieTimer().startTimer();
-
-        // Open base file reader
-        Pair<HoodieFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
-        HoodieFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
-        final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
-
-        // Open the log record scanner using the log files from the latest file slice
-        List<HoodieLogFile> logFiles = slice.getLogFiles().collect(Collectors.toList());
-        Pair<HoodieMetadataMergedLogRecordReader, Long> logRecordScannerOpenTimePair =
-            getLogRecordScanner(logFiles, partitionName);
-        HoodieMetadataMergedLogRecordReader logRecordScanner = logRecordScannerOpenTimePair.getKey();
-        final long logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
-
-        metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR,
-            +baseFileOpenMs + logScannerOpenMs));
-        return Pair.of(baseFileReader, logRecordScanner);
-      } catch (IOException e) {
-        throw new HoodieIOException("Error opening readers for metadata table partition " + partitionName, e);
-      }
-    });
+  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> getOrCreateReaders(String partitionName, FileSlice slice) {
+    return partitionReaders.computeIfAbsent(Pair.of(partitionName, slice.getFileId()), k -> openReaders(partitionName, slice));
+  }
+
+  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReaders(String partitionName, FileSlice slice) {
+    try {
+      HoodieTimer timer = new HoodieTimer().startTimer();
+      // Open base file reader
+      Pair<HoodieFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
+      HoodieFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
+      final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
+
+      // Open the log record scanner using the log files from the latest file slice
+      List<HoodieLogFile> logFiles = slice.getLogFiles().collect(Collectors.toList());
+      Pair<HoodieMetadataMergedLogRecordReader, Long> logRecordScannerOpenTimePair =
+          getLogRecordScanner(logFiles, partitionName);
+      HoodieMetadataMergedLogRecordReader logRecordScanner = logRecordScannerOpenTimePair.getKey();
+      final long logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
+
+      metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR,
+          +baseFileOpenMs + logScannerOpenMs));
+      return Pair.of(baseFileReader, logRecordScanner);
+    } catch (IOException e) {
+      throw new HoodieIOException("Error opening readers for metadata table partition " + partitionName, e);
+    }
   }
 
   private Pair<HoodieFileReader, Long> getBaseFileReader(FileSlice slice, HoodieTimer timer) throws IOException {
@@ -345,7 +456,14 @@ private Set<String> getValidInstantTimestamps() {
     return validInstantTimestamps;
   }
 
-  public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<HoodieLogFile> logFiles, String partitionName) {
+  public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<HoodieLogFile> logFiles,
+                                                                             String partitionName) {
+    return getLogRecordScanner(logFiles, partitionName, Option.empty());
+  }
+
+  public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<HoodieLogFile> logFiles,
+                                                                             String partitionName,
+                                                                             Option<Boolean> allowFullScanOverride) {
     HoodieTimer timer = new HoodieTimer().startTimer();
     List<String> sortedLogFilePaths = logFiles.stream()
         .sorted(HoodieLogFile.getLogFileComparator())
@@ -359,6 +477,8 @@ public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<
     Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
     String latestMetadataInstantTime = latestMetadataInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
 
+    boolean allowFullScan = allowFullScanOverride.orElseGet(() -> isFullScanAllowedForPartition(partitionName));
+
     // Load the schema
     Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
     HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
@@ -374,7 +494,7 @@ public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<
         .withDiskMapType(commonConfig.getSpillableDiskMapType())
         .withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
         .withLogBlockTimestamps(validInstantTimestamps)
-        .enableFullScan(metadataConfig.enableFullScan())
+        .allowFullScan(allowFullScan)
         .withPartition(partitionName)
         .build();
 
@@ -384,6 +504,21 @@ public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<
     return Pair.of(logRecordScanner, logScannerOpenMs);
   }
 
+  // NOTE: We're allowing eager full-scan of the log-files only for "files" partition.
+  //       Other partitions (like "column_stats", "bloom_filters") will have to be fetched
+  //       t/h point-lookups
+  private boolean isFullScanAllowedForPartition(String partitionName) {
+    switch (partitionName) {
+      case PARTITION_NAME_FILES:
+        return metadataConfig.allowFullScan();
+
+      case PARTITION_NAME_COLUMN_STATS:
+      case PARTITION_NAME_BLOOM_FILTERS:
+      default:
+        return false;
+    }
+  }
+
   /**
    * Returns a list of commits which were rolled back as part of a Rollback or Restore operation.
    *
@@ -429,6 +564,10 @@ public void close() {
   private synchronized void close(Pair<String, String> partitionFileSlicePair) {
     Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
         partitionReaders.remove(partitionFileSlicePair);
+    closeReader(readers);
+  }
+
+  private void closeReader(Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers) {
     if (readers != null) {
       try {
         if (readers.getKey() != null) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java
index 4f616c362fbf6..cbd7e6c17511c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java
@@ -18,11 +18,13 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieAvroRecord;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
 import org.apache.hudi.common.table.log.InstantRange;
@@ -30,19 +32,17 @@
 import org.apache.hudi.common.util.SpillableMapUtils;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.common.util.collection.Pair;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FileSystem;
+import org.apache.hudi.internal.schema.InternalSchema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
 /**
  * A {@code HoodieMergedLogRecordScanner} implementation which only merged records matching providing keys. This is
@@ -52,38 +52,16 @@ public class HoodieMetadataMergedLogRecordReader extends HoodieMergedLogRecordSc
 
   private static final Logger LOG = LogManager.getLogger(HoodieMetadataMergedLogRecordReader.class);
 
-  // Set of all record keys that are to be read in memory
-  private Set<String> mergeKeyFilter;
-
   private HoodieMetadataMergedLogRecordReader(FileSystem fs, String basePath, String partitionName,
                                               List<String> logFilePaths,
                                               Schema readerSchema, String latestInstantTime,
                                               Long maxMemorySizeInBytes, int bufferSize,
-                                              String spillableMapBasePath, Set<String> mergeKeyFilter,
+                                              String spillableMapBasePath,
                                               ExternalSpillableMap.DiskMapType diskMapType,
                                               boolean isBitCaskDiskMapCompressionEnabled,
-                                              Option<InstantRange> instantRange, boolean enableFullScan) {
-    super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, maxMemorySizeInBytes, false, false, bufferSize,
-        spillableMapBasePath, instantRange, false, diskMapType, isBitCaskDiskMapCompressionEnabled, false,
-        enableFullScan, Option.of(partitionName));
-    this.mergeKeyFilter = mergeKeyFilter;
-    if (enableFullScan) {
-      performScan();
-    }
-  }
-
-  @Override
-  protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException {
-    if (mergeKeyFilter.isEmpty() || mergeKeyFilter.contains(hoodieRecord.getRecordKey())) {
-      super.processNextRecord(hoodieRecord);
-    }
-  }
-
-  @Override
-  protected void processNextDeletedKey(HoodieKey hoodieKey) {
-    if (mergeKeyFilter.isEmpty() || mergeKeyFilter.contains(hoodieKey.getRecordKey())) {
-      super.processNextDeletedKey(hoodieKey);
-    }
+                                              Option<InstantRange> instantRange, boolean allowFullScan) {
+    super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, maxMemorySizeInBytes, true, false, bufferSize,
+        spillableMapBasePath, instantRange, diskMapType, isBitCaskDiskMapCompressionEnabled, false, allowFullScan, Option.of(partitionName), InternalSchema.getEmptyInternalSchema());
   }
 
   @Override
@@ -117,24 +95,37 @@ public static HoodieMetadataMergedLogRecordReader.Builder newBuilder() {
    * @return {@code HoodieRecord} if key was found else {@code Option.empty()}
    */
   public synchronized List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordByKey(String key) {
+    checkState(forceFullScan, "Record reader has to be in full-scan mode to use this API");
     return Collections.singletonList(Pair.of(key, Option.ofNullable((HoodieRecord) records.get(key))));
   }
 
+  @SuppressWarnings("unchecked")
+  public List<HoodieRecord<HoodieMetadataPayload>> getRecordsByKeyPrefixes(List<String> keyPrefixes) {
+    // Following operations have to be atomic, otherwise concurrent
+    // readers would race with each other and could crash when
+    // processing log block records as part of scan.
+    synchronized (this) {
+      records.clear();
+      scanInternal(Option.of(new KeySpec(keyPrefixes, false)));
+      return records.values().stream()
+          .filter(Objects::nonNull)
+          .map(record -> (HoodieRecord<HoodieMetadataPayload>) record)
+          .collect(Collectors.toList());
+    }
+  }
+
+  @SuppressWarnings("unchecked")
   public synchronized List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keys) {
     // Following operations have to be atomic, otherwise concurrent
     // readers would race with each other and could crash when
     // processing log block records as part of scan.
-    records.clear();
-    scan(Option.of(keys));
-    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> metadataRecords = new ArrayList<>();
-    keys.forEach(entry -> {
-      if (records.containsKey(entry)) {
-        metadataRecords.add(Pair.of(entry, Option.ofNullable((HoodieRecord) records.get(entry))));
-      } else {
-        metadataRecords.add(Pair.of(entry, Option.empty()));
-      }
-    });
-    return metadataRecords;
+    synchronized (this) {
+      records.clear();
+      scan(keys);
+      return keys.stream()
+          .map(key -> Pair.of(key, Option.ofNullable((HoodieRecord<HoodieMetadataPayload>) records.get(key))))
+          .collect(Collectors.toList());
+    }
   }
 
   @Override
@@ -146,9 +137,7 @@ protected String getKeyField() {
    * Builder used to build {@code HoodieMetadataMergedLogRecordScanner}.
    */
   public static class Builder extends HoodieMergedLogRecordScanner.Builder {
-    private Set<String> mergeKeyFilter = Collections.emptySet();
-    private boolean enableFullScan = HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.defaultValue();
-    private boolean enableInlineReading;
+    private boolean allowFullScan = HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.defaultValue();
 
     @Override
     public Builder withFileSystem(FileSystem fs) {
@@ -226,26 +215,21 @@ public Builder withBitCaskDiskMapCompressionEnabled(boolean isBitCaskDiskMapComp
       return this;
     }
 
-    public Builder withMergeKeyFilter(Set<String> mergeKeyFilter) {
-      this.mergeKeyFilter = mergeKeyFilter;
-      return this;
-    }
-
     public Builder withLogBlockTimestamps(Set<String> validLogBlockTimestamps) {
       withInstantRange(Option.of(new ExplicitMatchRange(validLogBlockTimestamps)));
       return this;
     }
 
-    public Builder enableFullScan(boolean enableFullScan) {
-      this.enableFullScan = enableFullScan;
+    public Builder allowFullScan(boolean enableFullScan) {
+      this.allowFullScan = enableFullScan;
       return this;
     }
 
     @Override
     public HoodieMetadataMergedLogRecordReader build() {
       return new HoodieMetadataMergedLogRecordReader(fs, basePath, partitionName, logFilePaths, readerSchema,
-          latestInstantTime, maxMemorySizeInBytes, bufferSize, spillableMapBasePath, mergeKeyFilter,
-          diskMapType, isBitCaskDiskMapCompressionEnabled, instantRange, enableFullScan);
+          latestInstantTime, maxMemorySizeInBytes, bufferSize, spillableMapBasePath,
+          diskMapType, isBitCaskDiskMapCompressionEnabled, instantRange, allowFullScan);
     }
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index f7d45db8b9a3f..58d186f971cb8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -18,10 +18,30 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.avro.Conversions;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.model.BooleanWrapper;
+import org.apache.hudi.avro.model.BytesWrapper;
+import org.apache.hudi.avro.model.DateWrapper;
+import org.apache.hudi.avro.model.DecimalWrapper;
+import org.apache.hudi.avro.model.DoubleWrapper;
+import org.apache.hudi.avro.model.FloatWrapper;
 import org.apache.hudi.avro.model.HoodieMetadataBloomFilter;
 import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.avro.model.HoodieMetadataFileInfo;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
+import org.apache.hudi.avro.model.IntWrapper;
+import org.apache.hudi.avro.model.LongWrapper;
+import org.apache.hudi.avro.model.StringWrapper;
+import org.apache.hudi.avro.model.TimestampMicrosWrapper;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
@@ -35,30 +55,33 @@
 import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.io.storage.HoodieHFileReader;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.nio.ByteBuffer;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Properties;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import static org.apache.hudi.TypeUtils.unsafeCast;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
 import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
 import static org.apache.hudi.common.util.ValidationUtils.checkState;
 import static org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getPartitionIdentifier;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.tryUpcastDecimal;
 
 /**
  * MetadataTable records are persisted with the schema defined in HoodieMetadata.avsc.
@@ -118,6 +141,8 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
   public static final String COLUMN_STATS_FIELD_TOTAL_UNCOMPRESSED_SIZE = "totalUncompressedSize";
   public static final String COLUMN_STATS_FIELD_IS_DELETED = FIELD_IS_DELETED;
 
+  private static final Conversions.DecimalConversion AVRO_DECIMAL_CONVERSION = new Conversions.DecimalConversion();
+
   private String key = null;
   private int type = 0;
   private Map<String, HoodieMetadataFileInfo> filesystemMetadata = null;
@@ -179,8 +204,8 @@ public HoodieMetadataPayload(Option<GenericRecord> recordOpt) {
           columnStatMetadata = HoodieMetadataColumnStats.newBuilder()
               .setFileName((String) columnStatsRecord.get(COLUMN_STATS_FIELD_FILE_NAME))
               .setColumnName((String) columnStatsRecord.get(COLUMN_STATS_FIELD_COLUMN_NAME))
-              .setMinValue((String) columnStatsRecord.get(COLUMN_STATS_FIELD_MIN_VALUE))
-              .setMaxValue((String) columnStatsRecord.get(COLUMN_STATS_FIELD_MAX_VALUE))
+              .setMinValue(columnStatsRecord.get(COLUMN_STATS_FIELD_MIN_VALUE))
+              .setMaxValue(columnStatsRecord.get(COLUMN_STATS_FIELD_MAX_VALUE))
               .setValueCount((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_VALUE_COUNT))
               .setNullCount((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_NULL_COUNT))
               .setTotalSize((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_TOTAL_SIZE))
@@ -221,8 +246,34 @@ protected HoodieMetadataPayload(String key, int type,
    * @param partitions The list of partitions
    */
   public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitions) {
+    return createPartitionListRecord(partitions, false);
+  }
+
+  /**
+   * Create and return a {@code HoodieMetadataPayload} to save list of partitions.
+   *
+   * @param partitions The list of partitions
+   */
+  public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitions, boolean isDeleted) {
+    Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
+    partitions.forEach(partition -> fileInfo.put(getPartitionIdentifier(partition), new HoodieMetadataFileInfo(0L, isDeleted)));
+
+    HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
+    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
+        fileInfo);
+    return new HoodieAvroRecord<>(key, payload);
+  }
+
+  /**
+   * Create and return a {@code HoodieMetadataPayload} to save list of partitions.
+   *
+   * @param partitionsAdded   The list of added partitions
+   * @param partitionsDeleted The list of deleted partitions
+   */
+  public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitionsAdded, List<String> partitionsDeleted) {
     Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
-    partitions.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
+    partitionsAdded.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
+    partitionsDeleted.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, true)));
 
     HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
     HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
@@ -320,9 +371,11 @@ private HoodieMetadataBloomFilter combineBloomFilterMetadata(HoodieMetadataPaylo
   private HoodieMetadataColumnStats combineColumnStatsMetadata(HoodieMetadataPayload previousRecord) {
     checkArgument(previousRecord.getColumnStatMetadata().isPresent());
     checkArgument(getColumnStatMetadata().isPresent());
-    checkArgument(previousRecord.getColumnStatMetadata().get()
-        .getFileName().equals(this.columnStatMetadata.getFileName()));
-    return HoodieTableMetadataUtil.mergeColumnStats(previousRecord.getColumnStatMetadata().get(), this.columnStatMetadata);
+
+    HoodieMetadataColumnStats previousColStatsRecord = previousRecord.getColumnStatMetadata().get();
+    HoodieMetadataColumnStats newColumnStatsRecord = getColumnStatMetadata().get();
+
+    return mergeColumnStatsRecords(previousColStatsRecord, newColumnStatsRecord);
   }
 
   @Override
@@ -338,7 +391,7 @@ public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRecord, S
   }
 
   @Override
-  public Option<IndexedRecord> getInsertValue(Schema schema, Properties properties) throws IOException {
+  public Option<IndexedRecord> getInsertValue(Schema schemaIgnored, Properties propertiesIgnored) throws IOException {
     if (key == null) {
       return Option.empty();
     }
@@ -502,29 +555,69 @@ public static String getColumnStatsIndexKey(String partitionName, HoodieColumnRa
     return getColumnStatsIndexKey(partitionIndexID, fileIndexID, columnIndexID);
   }
 
-  public static Stream<HoodieRecord> createColumnStatsRecords(
-      String partitionName, Collection<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList, boolean isDeleted) {
+  public static Stream<HoodieRecord> createColumnStatsRecords(String partitionName,
+                                                              Collection<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList,
+                                                              boolean isDeleted) {
     return columnRangeMetadataList.stream().map(columnRangeMetadata -> {
       HoodieKey key = new HoodieKey(getColumnStatsIndexKey(partitionName, columnRangeMetadata),
           MetadataPartitionType.COLUMN_STATS.getPartitionPath());
+
       HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(),
           HoodieMetadataColumnStats.newBuilder()
               .setFileName(new Path(columnRangeMetadata.getFilePath()).getName())
               .setColumnName(columnRangeMetadata.getColumnName())
-              .setMinValue(columnRangeMetadata.getMinValue() == null ? null :
-                  columnRangeMetadata.getMinValue().toString())
-              .setMaxValue(columnRangeMetadata.getMaxValue() == null ? null :
-                  columnRangeMetadata.getMaxValue().toString())
+              .setMinValue(wrapStatisticValue(columnRangeMetadata.getMinValue()))
+              .setMaxValue(wrapStatisticValue(columnRangeMetadata.getMaxValue()))
               .setNullCount(columnRangeMetadata.getNullCount())
               .setValueCount(columnRangeMetadata.getValueCount())
               .setTotalSize(columnRangeMetadata.getTotalSize())
               .setTotalUncompressedSize(columnRangeMetadata.getTotalUncompressedSize())
               .setIsDeleted(isDeleted)
               .build());
+
       return new HoodieAvroRecord<>(key, payload);
     });
   }
 
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private static HoodieMetadataColumnStats mergeColumnStatsRecords(HoodieMetadataColumnStats prevColumnStats,
+                                                                   HoodieMetadataColumnStats newColumnStats) {
+    checkArgument(Objects.equals(prevColumnStats.getFileName(), newColumnStats.getFileName()));
+    checkArgument(Objects.equals(prevColumnStats.getColumnName(), newColumnStats.getColumnName()));
+
+    if (newColumnStats.getIsDeleted()) {
+      return newColumnStats;
+    }
+
+    Comparable minValue =
+        (Comparable) Stream.of(
+            (Comparable) unwrapStatisticValueWrapper(prevColumnStats.getMinValue()),
+            (Comparable) unwrapStatisticValueWrapper(newColumnStats.getMinValue()))
+        .filter(Objects::nonNull)
+        .min(Comparator.naturalOrder())
+        .orElse(null);
+
+    Comparable maxValue =
+        (Comparable) Stream.of(
+            (Comparable) unwrapStatisticValueWrapper(prevColumnStats.getMinValue()),
+            (Comparable) unwrapStatisticValueWrapper(newColumnStats.getMinValue()))
+        .filter(Objects::nonNull)
+        .max(Comparator.naturalOrder())
+        .orElse(null);
+
+    return HoodieMetadataColumnStats.newBuilder()
+        .setFileName(newColumnStats.getFileName())
+        .setColumnName(newColumnStats.getColumnName())
+        .setMinValue(wrapStatisticValue(minValue))
+        .setMaxValue(wrapStatisticValue(maxValue))
+        .setValueCount(prevColumnStats.getValueCount() + newColumnStats.getValueCount())
+        .setNullCount(prevColumnStats.getNullCount() + newColumnStats.getNullCount())
+        .setTotalSize(prevColumnStats.getTotalSize() + newColumnStats.getTotalSize())
+        .setTotalUncompressedSize(prevColumnStats.getTotalUncompressedSize() + newColumnStats.getTotalUncompressedSize())
+        .setIsDeleted(newColumnStats.getIsDeleted())
+        .build();
+  }
+
   @Override
   public String toString() {
     final StringBuilder sb = new StringBuilder("HoodieMetadataPayload {");
@@ -550,6 +643,85 @@ public String toString() {
     return sb.toString();
   }
 
+  private static Object wrapStatisticValue(Comparable<?> statValue) {
+    if (statValue == null) {
+      return null;
+    } else if (statValue instanceof Date || statValue instanceof LocalDate) {
+      // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
+      //       rely on logical types to do proper encoding of the native Java types,
+      //       and hereby have to encode statistic manually
+      LocalDate localDate = statValue instanceof LocalDate
+          ? (LocalDate) statValue
+          : ((Date) statValue).toLocalDate();
+      return DateWrapper.newBuilder().setValue((int) localDate.toEpochDay()).build();
+    } else if (statValue instanceof BigDecimal) {
+      Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
+      BigDecimal upcastDecimal = tryUpcastDecimal((BigDecimal) statValue, (LogicalTypes.Decimal) valueSchema.getLogicalType());
+      return DecimalWrapper.newBuilder()
+          .setValue(AVRO_DECIMAL_CONVERSION.toBytes(upcastDecimal, valueSchema, valueSchema.getLogicalType()))
+          .build();
+    } else if (statValue instanceof Timestamp) {
+      // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
+      //       rely on logical types to do proper encoding of the native Java types,
+      //       and hereby have to encode statistic manually
+      Instant instant = ((Timestamp) statValue).toInstant();
+      return TimestampMicrosWrapper.newBuilder()
+          .setValue(instantToMicros(instant))
+          .build();
+    } else if (statValue instanceof Boolean) {
+      return BooleanWrapper.newBuilder().setValue((Boolean) statValue).build();
+    } else if (statValue instanceof Integer) {
+      return IntWrapper.newBuilder().setValue((Integer) statValue).build();
+    } else if (statValue instanceof Long) {
+      return LongWrapper.newBuilder().setValue((Long) statValue).build();
+    } else if (statValue instanceof Float) {
+      return FloatWrapper.newBuilder().setValue((Float) statValue).build();
+    } else if (statValue instanceof Double) {
+      return DoubleWrapper.newBuilder().setValue((Double) statValue).build();
+    } else if (statValue instanceof ByteBuffer) {
+      return BytesWrapper.newBuilder().setValue((ByteBuffer) statValue).build();
+    } else if (statValue instanceof String || statValue instanceof Utf8) {
+      return StringWrapper.newBuilder().setValue(statValue.toString()).build();
+    } else {
+      throw new UnsupportedOperationException(String.format("Unsupported type of the statistic (%s)", statValue.getClass()));
+    }
+  }
+
+  public static Comparable<?> unwrapStatisticValueWrapper(Object statValueWrapper) {
+    if (statValueWrapper == null) {
+      return null;
+    } else if (statValueWrapper instanceof DateWrapper) {
+      return LocalDate.ofEpochDay(((DateWrapper) statValueWrapper).getValue());
+    } else if (statValueWrapper instanceof DecimalWrapper) {
+      Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
+      return AVRO_DECIMAL_CONVERSION.fromBytes(((DecimalWrapper) statValueWrapper).getValue(), valueSchema, valueSchema.getLogicalType());
+    } else if (statValueWrapper instanceof TimestampMicrosWrapper) {
+      return microsToInstant(((TimestampMicrosWrapper) statValueWrapper).getValue());
+    } else if (statValueWrapper instanceof BooleanWrapper) {
+      return ((BooleanWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof IntWrapper) {
+      return ((IntWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof LongWrapper) {
+      return ((LongWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof FloatWrapper) {
+      return ((FloatWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof DoubleWrapper) {
+      return ((DoubleWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof BytesWrapper) {
+      return ((BytesWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof StringWrapper) {
+      return ((StringWrapper) statValueWrapper).getValue();
+    } else if (statValueWrapper instanceof GenericRecord) {
+      // NOTE: This branch could be hit b/c Avro records could be reconstructed
+      //       as {@code GenericRecord)
+      // TODO add logical type decoding
+      GenericRecord record = (GenericRecord) statValueWrapper;
+      return (Comparable<?>) record.get("value");
+    } else {
+      throw new UnsupportedOperationException(String.format("Unsupported type of the statistic (%s)", statValueWrapper.getClass()));
+    }
+  }
+
   private static void validatePayload(int type, Map<String, HoodieMetadataFileInfo> filesystemMetadata) {
     if (type == METADATA_TYPE_FILE_LIST) {
       filesystemMetadata.forEach((fileName, fileInfo) -> {
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
index 665eff3be407a..a059b5784556c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
@@ -24,9 +24,12 @@
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.util.Option;
+
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieMetadataException;
 
@@ -36,6 +39,7 @@
 import java.util.Map;
 
 import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
 /**
  * Interface that supports querying various pieces of metadata about a hudi table.
@@ -72,6 +76,17 @@ static String getDataTableBasePathFromMetadataTable(String metadataTableBasePath
     return metadataTableBasePath.substring(0, metadataTableBasePath.lastIndexOf(HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH) - 1);
   }
 
+  /**
+   * Return the base path of the dataset.
+   *
+   * @param metadataTableBasePath The base path of the metadata table
+   */
+  static String getDatasetBasePath(String metadataTableBasePath) {
+    int endPos = metadataTableBasePath.lastIndexOf(Path.SEPARATOR + HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH);
+    checkState(endPos != -1, metadataTableBasePath + " should be base path of the metadata table");
+    return metadataTableBasePath.substring(0, endPos);
+  }
+
   /**
    * Returns {@code True} if the given path contains a metadata table.
    *
@@ -146,6 +161,17 @@ Map<Pair<String, String>, BloomFilter> getBloomFilters(final List<Pair<String, S
   Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName)
       throws HoodieMetadataException;
 
+  /**
+   * Fetch records by key prefixes. Key prefix passed is expected to match the same prefix as stored in Metadata table partitions. For eg, in case of col stats partition,
+   * actual keys in metadata partition is encoded values of column name, partition name and file name. So, key prefixes passed to this method is expected to be encoded already.
+   *
+   * @param keyPrefixes list of key prefixes for which interested records are looked up for.
+   * @param partitionName partition name in metadata table where the records are looked up for.
+   * @return {@link HoodieData} of {@link HoodieRecord}s with records matching the passed in key prefixes.
+   */
+  HoodieData<HoodieRecord<HoodieMetadataPayload>> getRecordsByKeyPrefixes(List<String> keyPrefixes,
+                                                                          String partitionName);
+
   /**
    * Get the instant time to which the metadata is synced w.r.t data timeline.
    */
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index 4390e8766c6aa..3904ff6f832c9 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.hudi.avro.ConvertingGenericData;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
@@ -33,7 +34,9 @@
 import org.apache.hudi.common.model.HoodieDeltaWriteStat;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
@@ -46,7 +49,6 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ParquetUtils;
 import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
@@ -54,42 +56,46 @@
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
 
+import org.apache.avro.AvroTypeException;
+import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.util.Lazy;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import javax.annotation.Nonnull;
 
 import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
+import java.util.Set;
 import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collector;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import static org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields;
-import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE;
-import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT;
+import static org.apache.hudi.avro.HoodieAvroUtils.convertValueForSpecificDataTypes;
+import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
+import static org.apache.hudi.avro.HoodieAvroUtils.resolveNullableSchema;
 import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
+import static org.apache.hudi.metadata.HoodieMetadataPayload.unwrapStatisticValueWrapper;
 import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
 import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
 
@@ -100,9 +106,103 @@ public class HoodieTableMetadataUtil {
 
   private static final Logger LOG = LogManager.getLogger(HoodieTableMetadataUtil.class);
 
-  protected static final String PARTITION_NAME_FILES = "files";
-  protected static final String PARTITION_NAME_COLUMN_STATS = "column_stats";
-  protected static final String PARTITION_NAME_BLOOM_FILTERS = "bloom_filters";
+  public static final String PARTITION_NAME_FILES = "files";
+  public static final String PARTITION_NAME_COLUMN_STATS = "column_stats";
+  public static final String PARTITION_NAME_BLOOM_FILTERS = "bloom_filters";
+
+  /**
+   * Collects {@link HoodieColumnRangeMetadata} for the provided collection of records, pretending
+   * as if provided records have been persisted w/in given {@code filePath}
+   *
+   * @param records target records to compute column range metadata for
+   * @param targetFields columns (fields) to be collected
+   * @param filePath file path value required for {@link HoodieColumnRangeMetadata}
+   *
+   * @return map of {@link HoodieColumnRangeMetadata} for each of the provided target fields for
+   *         the collection of provided records
+   */
+  public static Map<String, HoodieColumnRangeMetadata<Comparable>> collectColumnRangeMetadata(List<IndexedRecord> records,
+                                                                                              List<Schema.Field> targetFields,
+                                                                                              String filePath) {
+    // Helper class to calculate column stats
+    class ColumnStats {
+      Object minValue;
+      Object maxValue;
+      long nullCount;
+      long valueCount;
+    }
+
+    HashMap<String, ColumnStats> allColumnStats = new HashMap<>();
+
+    // Collect stats for all columns by iterating through records while accounting
+    // corresponding stats
+    records.forEach((record) -> {
+      // For each column (field) we have to index update corresponding column stats
+      // with the values from this record
+      targetFields.forEach(field -> {
+        ColumnStats colStats = allColumnStats.computeIfAbsent(field.name(), (ignored) -> new ColumnStats());
+
+        GenericRecord genericRecord = (GenericRecord) record;
+
+        final Object fieldVal = convertValueForSpecificDataTypes(field.schema(), genericRecord.get(field.name()), true);
+        final Schema fieldSchema = getNestedFieldSchemaFromWriteSchema(genericRecord.getSchema(), field.name());
+
+        if (fieldVal != null && canCompare(fieldSchema)) {
+          // Set the min value of the field
+          if (colStats.minValue == null
+              || ConvertingGenericData.INSTANCE.compare(fieldVal, colStats.minValue, fieldSchema) < 0) {
+            colStats.minValue = fieldVal;
+          }
+
+          // Set the max value of the field
+          if (colStats.maxValue == null || ConvertingGenericData.INSTANCE.compare(fieldVal, colStats.maxValue, fieldSchema) > 0) {
+            colStats.maxValue = fieldVal;
+          }
+
+          colStats.valueCount++;
+        } else {
+          colStats.nullCount++;
+        }
+      });
+    });
+
+    Collector<HoodieColumnRangeMetadata<Comparable>, ?, Map<String, HoodieColumnRangeMetadata<Comparable>>> collector =
+        Collectors.toMap(colRangeMetadata -> colRangeMetadata.getColumnName(), Function.identity());
+
+    return (Map<String, HoodieColumnRangeMetadata<Comparable>>) targetFields.stream()
+      .map(field -> {
+        ColumnStats colStats = allColumnStats.get(field.name());
+        return HoodieColumnRangeMetadata.<Comparable>create(
+            filePath,
+            field.name(),
+            colStats == null ? null : coerceToComparable(field.schema(), colStats.minValue),
+            colStats == null ? null : coerceToComparable(field.schema(), colStats.maxValue),
+            colStats == null ? 0 : colStats.nullCount,
+            colStats == null ? 0 : colStats.valueCount,
+            // NOTE: Size and compressed size statistics are set to 0 to make sure we're not
+            //       mixing up those provided by Parquet with the ones from other encodings,
+            //       since those are not directly comparable
+            0,
+            0
+        );
+      })
+      .collect(collector);
+  }
+
+  /**
+   * Converts instance of {@link HoodieMetadataColumnStats} to {@link HoodieColumnRangeMetadata}
+   */
+  public static HoodieColumnRangeMetadata<Comparable> convertColumnStatsRecordToColumnRangeMetadata(HoodieMetadataColumnStats columnStats) {
+    return HoodieColumnRangeMetadata.<Comparable>create(
+        columnStats.getFileName(),
+        columnStats.getColumnName(),
+        unwrapStatisticValueWrapper(columnStats.getMinValue()),
+        unwrapStatisticValueWrapper(columnStats.getMaxValue()),
+        columnStats.getNullCount(),
+        columnStats.getValueCount(),
+        columnStats.getTotalSize(),
+        columnStats.getTotalUncompressedSize());
+  }
 
   /**
    * Delete the metadata table for the dataset. This will be invoked during upgrade/downgrade operation during which
@@ -113,12 +213,48 @@ public class HoodieTableMetadataUtil {
    * @param context  instance of {@link HoodieEngineContext}.
    */
   public static void deleteMetadataTable(String basePath, HoodieEngineContext context) {
+    final String metadataTablePathStr = HoodieTableMetadata.getMetadataTableBasePath(basePath);
+    FileSystem fs = FSUtils.getFs(metadataTablePathStr, context.getHadoopConf().get());
+    try {
+      Path metadataTablePath = new Path(metadataTablePathStr);
+      if (fs.exists(metadataTablePath)) {
+        fs.delete(metadataTablePath, true);
+      }
+    } catch (Exception e) {
+      throw new HoodieMetadataException("Failed to remove metadata table from path " + metadataTablePathStr, e);
+    }
+  }
+
+  /**
+   * Deletes the metadata partition from the file system.
+   *
+   * @param basePath      - base path of the dataset
+   * @param context       - instance of {@link HoodieEngineContext}
+   * @param partitionType - {@link MetadataPartitionType} of the partition to delete
+   */
+  public static void deleteMetadataPartition(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType) {
     final String metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
     FileSystem fs = FSUtils.getFs(metadataTablePath, context.getHadoopConf().get());
     try {
-      fs.delete(new Path(metadataTablePath), true);
+      fs.delete(new Path(metadataTablePath, partitionType.getPartitionPath()), true);
     } catch (Exception e) {
-      throw new HoodieMetadataException("Failed to remove metadata table from path " + metadataTablePath, e);
+      throw new HoodieMetadataException(String.format("Failed to remove metadata partition %s from path %s", partitionType, metadataTablePath), e);
+    }
+  }
+
+  /**
+   * Check if the given metadata partition exists.
+   *
+   * @param basePath base path of the dataset
+   * @param context  instance of {@link HoodieEngineContext}.
+   */
+  public static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType) {
+    final String metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
+    FileSystem fs = FSUtils.getFs(metadataTablePath, context.getHadoopConf().get());
+    try {
+      return fs.exists(new Path(metadataTablePath, partitionType.getPartitionPath()));
+    } catch (Exception e) {
+      throw new HoodieIOException(String.format("Failed to check metadata partition %s exists.", partitionType.getPartitionPath()));
     }
   }
 
@@ -161,10 +297,13 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCo
                                                                           String instantTime) {
     List<HoodieRecord> records = new ArrayList<>(commitMetadata.getPartitionToWriteStats().size());
 
-    // Add record bearing partitions list
-    ArrayList<String> partitionsList = new ArrayList<>(commitMetadata.getPartitionToWriteStats().keySet());
+    // Add record bearing added partitions list
+    List<String> partitionsAdded = getPartitionsAdded(commitMetadata);
 
-    records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsList));
+    // Add record bearing deleted partitions list
+    List<String> partitionsDeleted = getPartitionsDeleted(commitMetadata);
+
+    records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded, partitionsDeleted));
 
     // Update files listing records for each individual partition
     List<HoodieRecord<HoodieMetadataPayload>> updatedPartitionFilesRecords =
@@ -174,7 +313,7 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCo
               String partitionStatName = entry.getKey();
               List<HoodieWriteStat> writeStats = entry.getValue();
 
-              String partition = getPartition(partitionStatName);
+              String partition = getPartitionIdentifier(partitionStatName);
 
               HashMap<String, Long> updatedFilesToSizesMapping =
                   writeStats.stream().reduce(new HashMap<>(writeStats.size()),
@@ -214,6 +353,28 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCo
     return records;
   }
 
+  private static List<String> getPartitionsAdded(HoodieCommitMetadata commitMetadata) {
+    return commitMetadata.getPartitionToWriteStats().keySet().stream()
+        // We need to make sure we properly handle case of non-partitioned tables
+        .map(HoodieTableMetadataUtil::getPartitionIdentifier)
+        .collect(Collectors.toList());
+  }
+
+  private static List<String> getPartitionsDeleted(HoodieCommitMetadata commitMetadata) {
+    if (commitMetadata instanceof HoodieReplaceCommitMetadata
+        && WriteOperationType.DELETE_PARTITION.equals(commitMetadata.getOperationType())) {
+      Map<String, List<String>> partitionToReplaceFileIds =
+          ((HoodieReplaceCommitMetadata) commitMetadata).getPartitionToReplaceFileIds();
+
+      return partitionToReplaceFileIds.keySet().stream()
+          // We need to make sure we properly handle case of non-partitioned tables
+          .map(HoodieTableMetadataUtil::getPartitionIdentifier)
+          .collect(Collectors.toList());
+    }
+
+    return Collections.emptyList();
+  }
+
   /**
    * Convert commit action metadata to bloom filter records.
    *
@@ -285,21 +446,24 @@ public static HoodieData<HoodieRecord> convertMetadataToBloomFilterRecords(
   /**
    * Convert the clean action to metadata records.
    */
-  public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(
-      HoodieEngineContext engineContext, HoodieCleanMetadata cleanMetadata,
-      MetadataRecordsGenerationParams recordsGenerationParams, String instantTime) {
+  public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext,
+                                                                                              HoodieCleanMetadata cleanMetadata,
+                                                                                              MetadataRecordsGenerationParams recordsGenerationParams,
+                                                                                              String instantTime) {
     final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
     final HoodieData<HoodieRecord> filesPartitionRecordsRDD = engineContext.parallelize(
         convertMetadataToFilesPartitionRecords(cleanMetadata, instantTime), 1);
     partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD);
 
     if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.BLOOM_FILTERS)) {
-      final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD = convertMetadataToBloomFilterRecords(cleanMetadata, engineContext, instantTime, recordsGenerationParams);
+      final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD =
+          convertMetadataToBloomFilterRecords(cleanMetadata, engineContext, instantTime, recordsGenerationParams);
       partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
     }
 
     if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
-      final HoodieData<HoodieRecord> metadataColumnStatsRDD = convertMetadataToColumnStatsRecords(cleanMetadata, engineContext, recordsGenerationParams);
+      final HoodieData<HoodieRecord> metadataColumnStatsRDD =
+          convertMetadataToColumnStatsRecords(cleanMetadata, engineContext, recordsGenerationParams);
       partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
     }
 
@@ -317,8 +481,9 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCl
                                                                           String instantTime) {
     List<HoodieRecord> records = new LinkedList<>();
     int[] fileDeleteCount = {0};
+    List<String> deletedPartitions = new ArrayList<>();
     cleanMetadata.getPartitionMetadata().forEach((partitionName, partitionMetadata) -> {
-      final String partition = getPartition(partitionName);
+      final String partition = getPartitionIdentifier(partitionName);
       // Files deleted from a partition
       List<String> deletedFiles = partitionMetadata.getDeletePathPatterns();
       HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, Option.empty(),
@@ -326,10 +491,18 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCl
 
       records.add(record);
       fileDeleteCount[0] += deletedFiles.size();
+      boolean isPartitionDeleted = partitionMetadata.getIsPartitionDeleted();
+      if (isPartitionDeleted) {
+        deletedPartitions.add(partitionName);
+      }
     });
 
+    if (!deletedPartitions.isEmpty()) {
+      // if there are partitions to be deleted, add them to delete list
+      records.add(HoodieMetadataPayload.createPartitionListRecord(deletedPartitions, true));
+    }
     LOG.info("Updating at " + instantTime + " from Clean. #partitions_updated=" + records.size()
-        + ", #files_deleted=" + fileDeleteCount[0]);
+        + ", #files_deleted=" + fileDeleteCount[0] + ", #partitions_deleted=" + deletedPartitions.size());
     return records;
   }
 
@@ -385,8 +558,9 @@ public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(Hoodi
 
     HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
 
-    List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
-        dataTableMetaClient.getTableConfig(), tryResolveSchemaForTable(dataTableMetaClient));
+    List<String> columnsToIndex =
+        getColumnsToIndex(recordsGenerationParams,
+            Lazy.lazily(() -> tryResolveSchemaForTable(dataTableMetaClient)));
 
     if (columnsToIndex.isEmpty()) {
       // In case there are no columns to index, bail
@@ -396,8 +570,11 @@ public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(Hoodi
     int parallelism = Math.max(Math.min(deleteFileList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
     return engineContext.parallelize(deleteFileList, parallelism)
         .flatMap(deleteFileInfoPair -> {
-          if (deleteFileInfoPair.getRight().endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
-            return getColumnStats(deleteFileInfoPair.getLeft(), deleteFileInfoPair.getRight(), dataTableMetaClient, columnsToIndex, true).iterator();
+          String partitionPath = deleteFileInfoPair.getLeft();
+          String filePath = deleteFileInfoPair.getRight();
+
+          if (filePath.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+            return getColumnStatsRecords(partitionPath, filePath, dataTableMetaClient, columnsToIndex, true).iterator();
           }
           return Collections.emptyListIterator();
         });
@@ -425,7 +602,8 @@ public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetada
     }
 
     if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
-      final HoodieData<HoodieRecord> metadataColumnStatsRDD = convertFilesToColumnStatsRecords(engineContext, partitionToDeletedFiles, partitionToAppendedFiles, recordsGenerationParams);
+      final HoodieData<HoodieRecord> metadataColumnStatsRDD =
+          convertFilesToColumnStatsRecords(engineContext, partitionToDeletedFiles, partitionToAppendedFiles, recordsGenerationParams);
       partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
     }
     return partitionToRecordsMap;
@@ -470,7 +648,8 @@ public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetada
     }
 
     if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.COLUMN_STATS)) {
-      final HoodieData<HoodieRecord> metadataColumnStatsRDD = convertFilesToColumnStatsRecords(engineContext, partitionToDeletedFiles, partitionToAppendedFiles, recordsGenerationParams);
+      final HoodieData<HoodieRecord> metadataColumnStatsRDD =
+          convertFilesToColumnStatsRecords(engineContext, partitionToDeletedFiles, partitionToAppendedFiles, recordsGenerationParams);
       partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
     }
 
@@ -531,14 +710,35 @@ private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTi
       }
 
       // Case 2: The instant-to-rollback was never committed to Metadata Table. This can happen if the instant-to-rollback
-      // was a failed commit (never completed) as only completed instants are synced to Metadata Table.
-      // But the required Metadata Table instants should not have been archived
+      // was a failed commit (never completed).
+      //
+      // There are two cases for failed commit that we need to take care of:
+      //   1) The commit was synced to metadata table successfully but the dataset meta file switches state failed
+      //   (from INFLIGHT to COMPLETED), the committed files should be rolled back thus the rollback metadata
+      //   can not be skipped, usually a failover should be triggered and the metadata active timeline expects
+      //   to contain the commit, we could check whether the commit was synced to metadata table
+      //   through HoodieActiveTimeline#containsInstant.
+      //
+      //   2) The commit synced to metadata table failed or was never synced to metadata table,
+      //      in this case, the rollback metadata should be skipped.
+      //
+      // And in which case,
+      // metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())
+      // returns true ?
+      // It is most probably because of compaction rollback, we schedule a compaction plan early in the timeline (say t1)
+      // then after a long time schedule and execute the plan then try to rollback it.
+      //
+      //     scheduled   execution rollback            compaction actions
+      // -----  t1  -----  t3  ----- t4 -----          dataset timeline
+      //
+      // ----------  t2 (archive) -----------          metadata timeline
+      //
+      // when at time t4, we commit the compaction rollback,the above check returns true.
       HoodieInstant syncedInstant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, instantToRollback);
       if (metadataTableTimeline.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.getTimestamp())) {
         throw new HoodieMetadataException(String.format("The instant %s required to sync rollback of %s has been archived",
             syncedInstant, instantToRollback));
       }
-
       shouldSkip = !metadataTableTimeline.containsInstant(syncedInstant);
       if (!hasNonZeroRollbackLogFiles && shouldSkip) {
         LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, since this instant was never committed to Metadata Table",
@@ -592,7 +792,7 @@ private static List<HoodieRecord> convertFilesToFilesPartitionRecords(Map<String
 
     partitionToDeletedFiles.forEach((partitionName, deletedFiles) -> {
       fileChangeCount[0] += deletedFiles.size();
-      final String partition = getPartition(partitionName);
+      final String partition = getPartitionIdentifier(partitionName);
 
       Option<Map<String, Long>> filesAdded = Option.empty();
       if (partitionToAppendedFiles.containsKey(partitionName)) {
@@ -605,11 +805,11 @@ private static List<HoodieRecord> convertFilesToFilesPartitionRecords(Map<String
     });
 
     partitionToAppendedFiles.forEach((partitionName, appendedFileMap) -> {
-      final String partition = getPartition(partitionName);
+      final String partition = getPartitionIdentifier(partitionName);
       fileChangeCount[1] += appendedFileMap.size();
 
       // Validate that no appended file has been deleted
-      ValidationUtils.checkState(
+      checkState(
           !appendedFileMap.keySet().removeAll(partitionToDeletedFiles.getOrDefault(partition, Collections.emptyList())),
           "Rollback file cannot both be appended and deleted");
 
@@ -627,12 +827,9 @@ private static List<HoodieRecord> convertFilesToFilesPartitionRecords(Map<String
 
   /**
    * Returns partition name for the given path.
-   *
-   * @param path
-   * @return
    */
-  static String getPartition(@Nonnull String path) {
-    return EMPTY_PARTITION_NAME.equals(path) ? NON_PARTITIONED_NAME : path;
+  public static String getPartitionIdentifier(@Nonnull String relativePartitionPath) {
+    return EMPTY_PARTITION_NAME.equals(relativePartitionPath) ? NON_PARTITIONED_NAME : relativePartitionPath;
   }
 
   /**
@@ -658,7 +855,7 @@ public static HoodieData<HoodieRecord> convertFilesToBloomFilterRecords(HoodieEn
           return Stream.empty();
         }
 
-        final String partition = getPartition(partitionName);
+        final String partition = getPartitionIdentifier(partitionName);
         return Stream.<HoodieRecord>of(HoodieMetadataPayload.createBloomFilterMetadataRecord(
             partition, deletedFile, instantTime, StringUtils.EMPTY_STRING, ByteBuffer.allocate(0), true));
       }).iterator();
@@ -673,7 +870,7 @@ public static HoodieData<HoodieRecord> convertFilesToBloomFilterRecords(HoodieEn
     HoodieData<HoodieRecord> appendedFilesRecordsRDD = partitionToAppendedFilesRDD.flatMap(partitionToAppendedFilesPair -> {
       final String partitionName = partitionToAppendedFilesPair.getLeft();
       final Map<String, Long> appendedFileMap = partitionToAppendedFilesPair.getRight();
-      final String partition = getPartition(partitionName);
+      final String partition = getPartitionIdentifier(partitionName);
       return appendedFileMap.entrySet().stream().flatMap(appendedFileLengthPairEntry -> {
         final String appendedFile = appendedFileLengthPairEntry.getKey();
         if (!FSUtils.isBaseFile(new Path(appendedFile))) {
@@ -713,39 +910,47 @@ public static HoodieData<HoodieRecord> convertFilesToColumnStatsRecords(HoodieEn
     HoodieData<HoodieRecord> allRecordsRDD = engineContext.emptyHoodieData();
     HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
 
-    final List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
-        dataTableMetaClient.getTableConfig(), tryResolveSchemaForTable(dataTableMetaClient));
+    final List<String> columnsToIndex =
+        getColumnsToIndex(recordsGenerationParams,
+            Lazy.lazily(() -> tryResolveSchemaForTable(dataTableMetaClient)));
 
     if (columnsToIndex.isEmpty()) {
       // In case there are no columns to index, bail
       return engineContext.emptyHoodieData();
     }
 
-    final List<Pair<String, List<String>>> partitionToDeletedFilesList = partitionToDeletedFiles.entrySet()
-        .stream().map(e -> Pair.of(e.getKey(), e.getValue())).collect(Collectors.toList());
-    int parallelism = Math.max(Math.min(partitionToDeletedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
-    final HoodieData<Pair<String, List<String>>> partitionToDeletedFilesRDD = engineContext.parallelize(partitionToDeletedFilesList, parallelism);
+    final List<Pair<String, List<String>>> partitionToDeletedFilesList = partitionToDeletedFiles.entrySet().stream()
+        .map(e -> Pair.of(e.getKey(), e.getValue()))
+        .collect(Collectors.toList());
+
+    int deletedFilesTargetParallelism = Math.max(Math.min(partitionToDeletedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
+    final HoodieData<Pair<String, List<String>>> partitionToDeletedFilesRDD =
+        engineContext.parallelize(partitionToDeletedFilesList, deletedFilesTargetParallelism);
 
     HoodieData<HoodieRecord> deletedFilesRecordsRDD = partitionToDeletedFilesRDD.flatMap(partitionToDeletedFilesPair -> {
-      final String partitionName = partitionToDeletedFilesPair.getLeft();
-      final String partition = getPartition(partitionName);
+      final String partitionPath = partitionToDeletedFilesPair.getLeft();
+      final String partitionId = getPartitionIdentifier(partitionPath);
       final List<String> deletedFileList = partitionToDeletedFilesPair.getRight();
 
       return deletedFileList.stream().flatMap(deletedFile -> {
-        final String filePathWithPartition = partitionName + "/" + deletedFile;
-        return getColumnStats(partition, filePathWithPartition, dataTableMetaClient, columnsToIndex, true);
+        final String filePathWithPartition = partitionPath + "/" + deletedFile;
+        return getColumnStatsRecords(partitionId, filePathWithPartition, dataTableMetaClient, columnsToIndex, true);
       }).iterator();
     });
+
     allRecordsRDD = allRecordsRDD.union(deletedFilesRecordsRDD);
 
-    final List<Pair<String, Map<String, Long>>> partitionToAppendedFilesList = partitionToAppendedFiles.entrySet()
-        .stream().map(entry -> Pair.of(entry.getKey(), entry.getValue())).collect(Collectors.toList());
-    parallelism = Math.max(Math.min(partitionToAppendedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
-    final HoodieData<Pair<String, Map<String, Long>>> partitionToAppendedFilesRDD = engineContext.parallelize(partitionToAppendedFilesList, parallelism);
+    final List<Pair<String, Map<String, Long>>> partitionToAppendedFilesList = partitionToAppendedFiles.entrySet().stream()
+        .map(entry -> Pair.of(entry.getKey(), entry.getValue()))
+        .collect(Collectors.toList());
+
+    int appendedFilesTargetParallelism = Math.max(Math.min(partitionToAppendedFilesList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1);
+    final HoodieData<Pair<String, Map<String, Long>>> partitionToAppendedFilesRDD =
+        engineContext.parallelize(partitionToAppendedFilesList, appendedFilesTargetParallelism);
 
     HoodieData<HoodieRecord> appendedFilesRecordsRDD = partitionToAppendedFilesRDD.flatMap(partitionToAppendedFilesPair -> {
-      final String partitionName = partitionToAppendedFilesPair.getLeft();
-      final String partition = getPartition(partitionName);
+      final String partitionPath = partitionToAppendedFilesPair.getLeft();
+      final String partitionId = getPartitionIdentifier(partitionPath);
       final Map<String, Long> appendedFileMap = partitionToAppendedFilesPair.getRight();
 
       return appendedFileMap.entrySet().stream().flatMap(appendedFileNameLengthEntry -> {
@@ -753,11 +958,11 @@ public static HoodieData<HoodieRecord> convertFilesToColumnStatsRecords(HoodieEn
             || !appendedFileNameLengthEntry.getKey().endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
           return Stream.empty();
         }
-        final String filePathWithPartition = partitionName + "/" + appendedFileNameLengthEntry.getKey();
-        return getColumnStats(partition, filePathWithPartition, dataTableMetaClient, columnsToIndex, false);
+        final String filePathWithPartition = partitionPath + "/" + appendedFileNameLengthEntry.getKey();
+        return getColumnStatsRecords(partitionId, filePathWithPartition, dataTableMetaClient, columnsToIndex, false);
       }).iterator();
-
     });
+
     allRecordsRDD = allRecordsRDD.union(appendedFilesRecordsRDD);
 
     return allRecordsRDD;
@@ -856,6 +1061,24 @@ private static List<FileSlice> getPartitionFileSlices(HoodieTableMetaClient meta
     return fileSliceStream.sorted(Comparator.comparing(FileSlice::getFileId)).collect(Collectors.toList());
   }
 
+  /**
+   * Get the latest file slices for a given partition including the inflight ones.
+   *
+   * @param metaClient     - instance of {@link HoodieTableMetaClient}
+   * @param fileSystemView - hoodie table file system view, which will be fetched from meta client if not already present
+   * @param partition      - name of the partition whose file groups are to be loaded
+   * @return
+   */
+  public static List<FileSlice> getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient,
+                                                                              Option<HoodieTableFileSystemView> fileSystemView,
+                                                                              String partition) {
+    HoodieTableFileSystemView fsView = fileSystemView.orElse(getFileSystemView(metaClient));
+    Stream<FileSlice> fileSliceStream = fsView.fetchLatestFileSlicesIncludingInflight(partition);
+    return fileSliceStream
+        .sorted(Comparator.comparing(FileSlice::getFileId))
+        .collect(Collectors.toList());
+  }
+
   public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata,
                                                                              HoodieEngineContext engineContext,
                                                                              MetadataRecordsGenerationParams recordsGenerationParams) {
@@ -871,8 +1094,8 @@ public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(Hoodi
           Option.ofNullable(commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY))
               .flatMap(writerSchemaStr ->
                   isNullOrEmpty(writerSchemaStr)
-                    ? Option.empty()
-                    : Option.of(new Schema.Parser().parse(writerSchemaStr)));
+                      ? Option.empty()
+                      : Option.of(new Schema.Parser().parse(writerSchemaStr)));
 
       HoodieTableMetaClient dataTableMetaClient = recordsGenerationParams.getDataMetaClient();
       HoodieTableConfig tableConfig = dataTableMetaClient.getTableConfig();
@@ -882,7 +1105,7 @@ public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(Hoodi
           tableConfig.populateMetaFields() ? addMetadataFields(schema) : schema);
 
       List<String> columnsToIndex = getColumnsToIndex(recordsGenerationParams,
-          tableConfig, tableSchema);
+          Lazy.eagerly(tableSchema));
 
       if (columnsToIndex.isEmpty()) {
         // In case there are no columns to index, bail
@@ -899,74 +1122,85 @@ public static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(Hoodi
   }
 
   /**
-   * Get the latest columns for the table for column stats indexing.
+   * Get the list of columns for the table for column stats indexing
    */
   private static List<String> getColumnsToIndex(MetadataRecordsGenerationParams recordsGenParams,
-                                                HoodieTableConfig tableConfig,
-                                                Option<Schema> writerSchemaOpt) {
-    if (recordsGenParams.isAllColumnStatsIndexEnabled() && writerSchemaOpt.isPresent()) {
-      return writerSchemaOpt.get().getFields().stream()
-          .map(Schema.Field::name).collect(Collectors.toList());
-    }
-
-    // In case no writer schema could be obtained we fall back to only index primary key
-    // columns
-    return Arrays.asList(tableConfig.getRecordKeyFields().get());
-  }
+                                                Lazy<Option<Schema>> lazyWriterSchemaOpt) {
+    checkState(recordsGenParams.isColumnStatsIndexEnabled());
 
-  public static HoodieMetadataColumnStats mergeColumnStats(HoodieMetadataColumnStats oldColumnStats, HoodieMetadataColumnStats newColumnStats) {
-    ValidationUtils.checkArgument(oldColumnStats.getFileName().equals(newColumnStats.getFileName()));
-    if (newColumnStats.getIsDeleted()) {
-      return newColumnStats;
+    List<String> targetColumns = recordsGenParams.getTargetColumnsForColumnStatsIndex();
+    if (!targetColumns.isEmpty()) {
+      return targetColumns;
     }
-    return HoodieMetadataColumnStats.newBuilder()
-        .setFileName(newColumnStats.getFileName())
-        .setMinValue(Stream.of(oldColumnStats.getMinValue(), newColumnStats.getMinValue()).filter(Objects::nonNull).min(Comparator.naturalOrder()).orElse(null))
-        .setMaxValue(Stream.of(oldColumnStats.getMinValue(), newColumnStats.getMinValue()).filter(Objects::nonNull).max(Comparator.naturalOrder()).orElse(null))
-        .setValueCount(oldColumnStats.getValueCount() + newColumnStats.getValueCount())
-        .setNullCount(oldColumnStats.getNullCount() + newColumnStats.getNullCount())
-        .setTotalSize(oldColumnStats.getTotalSize() + newColumnStats.getTotalSize())
-        .setTotalUncompressedSize(oldColumnStats.getTotalUncompressedSize() + newColumnStats.getTotalUncompressedSize())
-        .setIsDeleted(newColumnStats.getIsDeleted())
-        .build();
+
+    Option<Schema> writerSchemaOpt = lazyWriterSchemaOpt.get();
+    return writerSchemaOpt
+        .map(writerSchema ->
+            writerSchema.getFields().stream()
+                .map(Schema.Field::name)
+                .collect(Collectors.toList()))
+        .orElse(Collections.emptyList());
   }
 
-  public static Stream<HoodieRecord> translateWriteStatToColumnStats(HoodieWriteStat writeStat,
+  private static Stream<HoodieRecord> translateWriteStatToColumnStats(HoodieWriteStat writeStat,
                                                                      HoodieTableMetaClient datasetMetaClient,
                                                                      List<String> columnsToIndex) {
-    if (writeStat instanceof HoodieDeltaWriteStat && ((HoodieDeltaWriteStat) writeStat).getRecordsStats().isPresent()) {
-      Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangeMap = ((HoodieDeltaWriteStat) writeStat).getRecordsStats().get().getStats();
-      List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList = new ArrayList<>(columnRangeMap.values());
+    if (writeStat instanceof HoodieDeltaWriteStat && ((HoodieDeltaWriteStat) writeStat).getColumnStats().isPresent()) {
+      Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangeMap = ((HoodieDeltaWriteStat) writeStat).getColumnStats().get();
+      Collection<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList = columnRangeMap.values();
       return HoodieMetadataPayload.createColumnStatsRecords(writeStat.getPartitionPath(), columnRangeMetadataList, false);
     }
-    return getColumnStats(writeStat.getPartitionPath(), writeStat.getPath(), datasetMetaClient, columnsToIndex, false);
+
+    return getColumnStatsRecords(writeStat.getPartitionPath(), writeStat.getPath(), datasetMetaClient, columnsToIndex, false);
   }
 
-  private static Stream<HoodieRecord> getColumnStats(final String partitionPath, final String filePathWithPartition,
-                                                     HoodieTableMetaClient datasetMetaClient,
-                                                     List<String> columnsToIndex,
-                                                     boolean isDeleted) {
-    final String partition = getPartition(partitionPath);
-    final int offset = partition.equals(NON_PARTITIONED_NAME) ? (filePathWithPartition.startsWith("/") ? 1 : 0)
-        : partition.length() + 1;
-    final String fileName = filePathWithPartition.substring(offset);
-
-    if (filePathWithPartition.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
-      final Path fullFilePath = new Path(datasetMetaClient.getBasePath(), filePathWithPartition);
-      List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList;
-      if (!isDeleted) {
-        columnRangeMetadataList = new ParquetUtils().readRangeFromParquetMetadata(
-            datasetMetaClient.getHadoopConf(), fullFilePath, columnsToIndex);
-      } else {
-        // TODO we should delete records instead of stubbing them
-        columnRangeMetadataList =
-            columnsToIndex.stream().map(entry -> new HoodieColumnRangeMetadata<Comparable>(fileName,
-                    entry, null, null, 0, 0, 0, 0))
-                .collect(Collectors.toList());
+  private static Stream<HoodieRecord> getColumnStatsRecords(String partitionPath,
+                                                            String filePath,
+                                                            HoodieTableMetaClient datasetMetaClient,
+                                                            List<String> columnsToIndex,
+                                                            boolean isDeleted) {
+    String partitionName = getPartitionIdentifier(partitionPath);
+    // NOTE: We have to chop leading "/" to make sure Hadoop does not treat it like
+    //       absolute path
+    String filePartitionPath = filePath.startsWith("/") ? filePath.substring(1) : filePath;
+    String fileName = partitionName.equals(NON_PARTITIONED_NAME)
+        ? filePartitionPath
+        : filePartitionPath.substring(partitionName.length() + 1);
+
+    if (isDeleted) {
+      // TODO we should delete records instead of stubbing them
+      List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList = columnsToIndex.stream()
+          .map(entry -> HoodieColumnRangeMetadata.stub(fileName, entry))
+          .collect(Collectors.toList());
+
+      return HoodieMetadataPayload.createColumnStatsRecords(partitionPath, columnRangeMetadataList, true);
+    }
+
+    List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadata =
+        readColumnRangeMetadataFrom(filePartitionPath, datasetMetaClient, columnsToIndex);
+
+    return HoodieMetadataPayload.createColumnStatsRecords(partitionPath, columnRangeMetadata, false);
+  }
+
+  private static List<HoodieColumnRangeMetadata<Comparable>> readColumnRangeMetadataFrom(String filePath,
+                                                                                         HoodieTableMetaClient datasetMetaClient,
+                                                                                         List<String> columnsToIndex) {
+    try {
+      if (filePath.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+        Path fullFilePath = new Path(datasetMetaClient.getBasePath(), filePath);
+        List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList =
+            new ParquetUtils().readRangeFromParquetMetadata(datasetMetaClient.getHadoopConf(), fullFilePath, columnsToIndex);
+
+        return columnRangeMetadataList;
       }
-      return HoodieMetadataPayload.createColumnStatsRecords(partitionPath, columnRangeMetadataList, isDeleted);
-    } else {
-      throw new HoodieException("Column range index not supported for filePathWithPartition " + fileName);
+
+      LOG.warn("Column range index not supported for: " + filePath);
+      return Collections.emptyList();
+    } catch (Exception e) {
+      // NOTE: In case reading column range metadata from individual file failed,
+      //       we simply fall back, in lieu of failing the whole task
+      LOG.error("Failed to fetch column range metadata for: " + filePath);
+      return Collections.emptyList();
     }
   }
 
@@ -1001,72 +1235,37 @@ public static int getPartitionFileGroupCount(final MetadataPartitionType partiti
   }
 
   /**
-   * Accumulates column range metadata for the given field and updates the column range map.
-   *
-   * @param field          - column for which statistics will be computed
-   * @param filePath       - data file path
-   * @param columnRangeMap - old column range statistics, which will be merged in this computation
-   * @param columnToStats  - map of column to map of each stat and its value
-   */
-  public static void accumulateColumnRanges(Schema.Field field, String filePath,
-                                            Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangeMap,
-                                            Map<String, Map<String, Object>> columnToStats) {
-    Map<String, Object> columnStats = columnToStats.get(field.name());
-    HoodieColumnRangeMetadata<Comparable> columnRangeMetadata = new HoodieColumnRangeMetadata<>(
-        filePath,
-        field.name(),
-        String.valueOf(columnStats.get(MIN)),
-        String.valueOf(columnStats.get(MAX)),
-        Long.parseLong(columnStats.getOrDefault(NULL_COUNT, 0).toString()),
-        Long.parseLong(columnStats.getOrDefault(VALUE_COUNT, 0).toString()),
-        Long.parseLong(columnStats.getOrDefault(TOTAL_SIZE, 0).toString()),
-        Long.parseLong(columnStats.getOrDefault(TOTAL_UNCOMPRESSED_SIZE, 0).toString())
-    );
-    columnRangeMap.merge(field.name(), columnRangeMetadata, COLUMN_RANGE_MERGE_FUNCTION);
-  }
-
-  /**
-   * Aggregates column stats for each field.
-   *
-   * @param record                            - current record
-   * @param schema                            - write schema
-   * @param columnToStats                     - map of column to map of each stat and its value which gets updates in this method
-   * @param consistentLogicalTimestampEnabled - flag to deal with logical timestamp type when getting column value
+   * Does an upcast for {@link BigDecimal} instance to align it with scale/precision expected by
+   * the {@link org.apache.avro.LogicalTypes.Decimal} Avro logical type
    */
-  public static void aggregateColumnStats(IndexedRecord record, Schema schema,
-                                          Map<String, Map<String, Object>> columnToStats,
-                                          boolean consistentLogicalTimestampEnabled) {
-    if (!(record instanceof GenericRecord)) {
-      throw new HoodieIOException("Record is not a generic type to get column range metadata!");
+  public static BigDecimal tryUpcastDecimal(BigDecimal value, final LogicalTypes.Decimal decimal) {
+    final int scale = decimal.getScale();
+    final int valueScale = value.scale();
+
+    boolean scaleAdjusted = false;
+    if (valueScale != scale) {
+      try {
+        value = value.setScale(scale, RoundingMode.UNNECESSARY);
+        scaleAdjusted = true;
+      } catch (ArithmeticException aex) {
+        throw new AvroTypeException(
+            "Cannot encode decimal with scale " + valueScale + " as scale " + scale + " without rounding");
+      }
     }
 
-    schema.getFields().forEach(field -> {
-      Map<String, Object> columnStats = columnToStats.getOrDefault(field.name(), new HashMap<>());
-      final String fieldVal = getNestedFieldValAsString((GenericRecord) record, field.name(), true, consistentLogicalTimestampEnabled);
-      // update stats
-      final int fieldSize = fieldVal == null ? 0 : fieldVal.length();
-      columnStats.put(TOTAL_SIZE, Long.parseLong(columnStats.getOrDefault(TOTAL_SIZE, 0).toString()) + fieldSize);
-      columnStats.put(TOTAL_UNCOMPRESSED_SIZE, Long.parseLong(columnStats.getOrDefault(TOTAL_UNCOMPRESSED_SIZE, 0).toString()) + fieldSize);
-
-      if (!isNullOrEmpty(fieldVal)) {
-        // set the min value of the field
-        if (!columnStats.containsKey(MIN)) {
-          columnStats.put(MIN, fieldVal);
-        }
-        if (fieldVal.compareTo(String.valueOf(columnStats.get(MIN))) < 0) {
-          columnStats.put(MIN, fieldVal);
-        }
-        // set the max value of the field
-        if (fieldVal.compareTo(String.valueOf(columnStats.getOrDefault(MAX, ""))) > 0) {
-          columnStats.put(MAX, fieldVal);
-        }
-        // increment non-null value count
-        columnStats.put(VALUE_COUNT, Long.parseLong(columnStats.getOrDefault(VALUE_COUNT, 0).toString()) + 1);
+    int precision = decimal.getPrecision();
+    int valuePrecision = value.precision();
+    if (valuePrecision > precision) {
+      if (scaleAdjusted) {
+        throw new AvroTypeException("Cannot encode decimal with precision " + valuePrecision + " as max precision "
+            + precision + ". This is after safely adjusting scale from " + valueScale + " to required " + scale);
       } else {
-        // increment null value count
-        columnStats.put(NULL_COUNT, Long.parseLong(columnStats.getOrDefault(NULL_COUNT, 0).toString()) + 1);
+        throw new AvroTypeException(
+            "Cannot encode decimal with precision " + valuePrecision + " as max precision " + precision);
       }
-    });
+    }
+
+    return value;
   }
 
   private static Option<Schema> tryResolveSchemaForTable(HoodieTableMetaClient dataTableMetaClient) {
@@ -1074,11 +1273,111 @@ private static Option<Schema> tryResolveSchemaForTable(HoodieTableMetaClient dat
       return Option.empty();
     }
 
-    TableSchemaResolver schemaResolver = new TableSchemaResolver(dataTableMetaClient);
     try {
+      TableSchemaResolver schemaResolver = new TableSchemaResolver(dataTableMetaClient);
       return Option.of(schemaResolver.getTableAvroSchema());
     } catch (Exception e) {
       throw new HoodieException("Failed to get latest columns for " + dataTableMetaClient.getBasePath(), e);
     }
   }
+
+  /**
+   * Given a schema, coerces provided value to instance of {@link Comparable<?>} such that
+   * it could subsequently used in column stats
+   *
+   * NOTE: This method has to stay compatible with the semantic of
+   *      {@link ParquetUtils#readRangeFromParquetMetadata} as they are used in tandem
+   */
+  private static Comparable<?> coerceToComparable(Schema schema, Object val) {
+    if (val == null) {
+      return null;
+    }
+
+    switch (schema.getType()) {
+      case UNION:
+        // TODO we need to handle unions in general case as well
+        return coerceToComparable(resolveNullableSchema(schema), val);
+
+      case FIXED:
+      case BYTES:
+        if (schema.getLogicalType() instanceof LogicalTypes.Decimal) {
+          return (Comparable<?>) val;
+        }
+        return (ByteBuffer) val;
+
+
+      case INT:
+        if (schema.getLogicalType() == LogicalTypes.date()
+            || schema.getLogicalType() == LogicalTypes.timeMillis()) {
+          // NOTE: This type will be either {@code java.sql.Date} or {org.joda.LocalDate}
+          //       depending on the Avro version. Hence, we simply cast it to {@code Comparable<?>}
+          return (Comparable<?>) val;
+        }
+        return (Integer) val;
+
+      case LONG:
+        if (schema.getLogicalType() == LogicalTypes.timeMicros()
+            || schema.getLogicalType() == LogicalTypes.timestampMicros()
+            || schema.getLogicalType() == LogicalTypes.timestampMillis()) {
+          // NOTE: This type will be either {@code java.sql.Date} or {org.joda.LocalDate}
+          //       depending on the Avro version. Hence, we simply cast it to {@code Comparable<?>}
+          return (Comparable<?>) val;
+        }
+        return (Long) val;
+
+      case STRING:
+      case FLOAT:
+      case DOUBLE:
+      case BOOLEAN:
+        return (Comparable<?>) val;
+
+
+      // TODO add support for those types
+      case ENUM:
+      case MAP:
+      case NULL:
+      case RECORD:
+      case ARRAY:
+        return null;
+
+      default:
+        throw new IllegalStateException("Unexpected type: " + schema.getType());
+    }
+  }
+
+  private static boolean canCompare(Schema schema) {
+    return schema.getType() != Schema.Type.MAP;
+  }
+
+  public static Set<String> getInflightMetadataPartitions(HoodieTableConfig tableConfig) {
+    return new HashSet<>(tableConfig.getMetadataPartitionsInflight());
+  }
+
+  public static Set<String> getCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
+    return new HashSet<>(tableConfig.getMetadataPartitions());
+  }
+
+  public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) {
+    Set<String> inflightAndCompletedPartitions = getInflightMetadataPartitions(tableConfig);
+    inflightAndCompletedPartitions.addAll(getCompletedMetadataPartitions(tableConfig));
+    return inflightAndCompletedPartitions;
+  }
+
+  /**
+   * Get Last commit's Metadata.
+   */
+  public static Option<HoodieCommitMetadata> getLatestCommitMetadata(HoodieTableMetaClient metaClient) {
+    try {
+      HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
+      if (timeline.lastInstant().isPresent()) {
+        HoodieInstant instant = timeline.lastInstant().get();
+        byte[] data = timeline.getInstantDetails(instant).get();
+        return Option.of(HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class));
+      } else {
+        return Option.empty();
+      }
+    } catch (Exception e) {
+      throw new HoodieException("Failed to get commit metadata", e);
+    }
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 9fb268e7de1b0..85505c025bcdd 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -31,6 +31,8 @@ public enum MetadataPartitionType {
   // FileId prefix used for all file groups in this partition.
   private final String fileIdPrefix;
   // Total file groups
+  // TODO fix: enum should not have any mutable aspect as this compromises whole idea
+  //      of the inum being static, immutable entity
   private int fileGroupCount = 1;
 
   MetadataPartitionType(final String partitionPath, final String fileIdPrefix) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataRecordsGenerationParams.java b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataRecordsGenerationParams.java
index 21d5b173b2338..72a8bf4cd26f8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataRecordsGenerationParams.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataRecordsGenerationParams.java
@@ -26,24 +26,33 @@
 
 /**
  * Encapsulates all parameters required to generate metadata index for enabled index types.
+ *
+ * @deprecated this component currently duplicates configuration coming from the {@code HoodieWriteConfig}
+ *             which is problematic; instead we should break this component down and use source of truth
+ *             for each respective data-point directly ({@code HoodieWriteConfig}, {@code HoodieTableMetaClient}, etc)
  */
+@Deprecated
 public class MetadataRecordsGenerationParams implements Serializable {
 
   private final HoodieTableMetaClient dataMetaClient;
   private final List<MetadataPartitionType> enabledPartitionTypes;
   private final String bloomFilterType;
   private final int bloomIndexParallelism;
-  private final boolean isAllColumnStatsIndexEnabled;
+  private final boolean isColumnStatsIndexEnabled;
   private final int columnStatsIndexParallelism;
+  private final List<String> targetColumnsForColumnStatsIndex;
+  private final List<String> targetColumnsForBloomFilterIndex;
 
   MetadataRecordsGenerationParams(HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> enabledPartitionTypes, String bloomFilterType, int bloomIndexParallelism,
-      boolean isAllColumnStatsIndexEnabled, int columnStatsIndexParallelism) {
+                                  boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex, List<String> targetColumnsForBloomFilterIndex) {
     this.dataMetaClient = dataMetaClient;
     this.enabledPartitionTypes = enabledPartitionTypes;
     this.bloomFilterType = bloomFilterType;
     this.bloomIndexParallelism = bloomIndexParallelism;
-    this.isAllColumnStatsIndexEnabled = isAllColumnStatsIndexEnabled;
+    this.isColumnStatsIndexEnabled = isColumnStatsIndexEnabled;
     this.columnStatsIndexParallelism = columnStatsIndexParallelism;
+    this.targetColumnsForColumnStatsIndex = targetColumnsForColumnStatsIndex;
+    this.targetColumnsForBloomFilterIndex = targetColumnsForBloomFilterIndex;
   }
 
   public HoodieTableMetaClient getDataMetaClient() {
@@ -58,8 +67,8 @@ public String getBloomFilterType() {
     return bloomFilterType;
   }
 
-  public boolean isAllColumnStatsIndexEnabled() {
-    return isAllColumnStatsIndexEnabled;
+  public boolean isColumnStatsIndexEnabled() {
+    return isColumnStatsIndexEnabled;
   }
 
   public int getBloomIndexParallelism() {
@@ -69,4 +78,12 @@ public int getBloomIndexParallelism() {
   public int getColumnStatsIndexParallelism() {
     return columnStatsIndexParallelism;
   }
+
+  public List<String> getTargetColumnsForColumnStatsIndex() {
+    return targetColumnsForColumnStatsIndex;
+  }
+
+  public List<String> getSecondaryKeysForBloomFilterIndex() {
+    return targetColumnsForBloomFilterIndex;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/util/Lazy.java b/hudi-common/src/main/java/org/apache/hudi/util/Lazy.java
new file mode 100644
index 0000000000000..106969b70ff6c
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/util/Lazy.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.util;
+
+import java.util.function.Supplier;
+
+/**
+ * Utility implementing lazy semantics in Java
+ *
+ * @param <T> type of the object being held by {@link Lazy}
+ */
+public class Lazy<T> {
+
+  private volatile boolean initialized;
+
+  private Supplier<T> initializer;
+  private T ref;
+
+  private Lazy(Supplier<T> initializer) {
+    this.initializer = initializer;
+    this.ref = null;
+    this.initialized = false;
+  }
+
+  private Lazy(T ref) {
+    this.initializer = null;
+    this.ref = ref;
+    this.initialized = true;
+  }
+
+  public T get() {
+    if (!initialized) {
+      synchronized (this) {
+        if (!initialized) {
+          this.ref = initializer.get();
+          this.initializer = null;
+          initialized = true;
+        }
+      }
+    }
+
+    return ref;
+  }
+
+  /**
+   * Executes provided {@code initializer} lazily, while providing for "exactly once" semantic,
+   * to instantiate value of type {@link T} being subsequently held by the returned instance of
+   * {@link Lazy}
+   */
+  public static <T> Lazy<T> lazily(Supplier<T> initializer) {
+    return new Lazy<>(initializer);
+  }
+
+  /**
+   * Instantiates {@link Lazy} in an "eagerly" fashion setting it w/ the provided value of
+   * type {@link T} directly, bypassing lazy initialization sequence
+   */
+  public static <T> Lazy<T> eagerly(T ref) {
+    return new Lazy<>(ref);
+  }
+}
diff --git a/hudi-common/src/main/resources/hbase-site.xml b/hudi-common/src/main/resources/hbase-site.xml
new file mode 100644
index 0000000000000..ad680e6b8999e
--- /dev/null
+++ b/hudi-common/src/main/resources/hbase-site.xml
@@ -0,0 +1,2185 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+
+<!--
+In Hudi bundles, we include this hbase-site.xml containing HBase
+default configs from the hbase-common 2.4.9 we use, to override the
+default configs loaded from hbase-default.xml from an older HBase
+version and to ensure correct default configs for Hudi HBase usage.
+In Hive, the Hive server loads all lib jars including HBase jars
+with its corresponding hbase-default.xml into class path (e.g.,
+HBase 1.1.1), and that can cause conflict with the hbase-default.xml
+in Hudi bundles (HBase 2.4.9).  The exception is thrown as follows:
+
+Caused by: java.lang.RuntimeException: hbase-default.xml file
+seems to be for an older version of HBase (1.1.1), this version is 2.4.9
+
+Relevant logic causing such exception can be found in
+HBaseConfiguration::addHbaseResources().  To get around this issue,
+since HBase loads "hbase-site.xml" after "hbase-default.xml", we
+provide hbase-site.xml from the bundle so that HBaseConfiguration
+can pick it up and ensure the right defaults.
+-->
+
+<!--
+OVERVIEW
+
+The important configs. are listed near the top.  You should change
+at least the setting for hbase.tmp.dir.  Other settings will change
+dependent on whether you are running hbase in standalone mode or
+distributed.  See the hbase reference guide for requirements and
+guidance making configuration.
+
+This file does not contain all possible configurations.  The file would be
+much larger if it carried everything. The absent configurations will only be
+found through source code reading.  The idea is that such configurations are
+exotic and only those who would go to the trouble of reading a particular
+section in the code would be knowledgeable or invested enough in ever wanting
+to alter such configurations, so we do not list them here.  Listing all
+possible configurations would overwhelm and obscure the important.
+-->
+
+<configuration>
+  <!--Configs you will likely change are listed here at the top of the file.
+  -->
+  <property>
+    <name>hbase.tmp.dir</name>
+    <value>${java.io.tmpdir}/hbase-${user.name}</value>
+    <description>Temporary directory on the local filesystem.
+      Change this setting to point to a location more permanent
+      than '/tmp', the usual resolve for java.io.tmpdir, as the
+      '/tmp' directory is cleared on machine restart.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rootdir</name>
+    <value>${hbase.tmp.dir}/hbase</value>
+    <description>The directory shared by region servers and into
+      which HBase persists. The URL should be 'fully-qualified'
+      to include the filesystem scheme. For example, to specify the
+      HDFS directory '/hbase' where the HDFS instance's namenode is
+      running at namenode.example.org on port 9000, set this value to:
+      hdfs://namenode.example.org:9000/hbase. By default, we write
+      to whatever ${hbase.tmp.dir} is set too -- usually /tmp --
+      so change this configuration or else all data will be lost on
+      machine restart.
+    </description>
+  </property>
+  <property>
+    <name>hbase.cluster.distributed</name>
+    <value>false</value>
+    <description>The mode the cluster will be in. Possible values are
+      false for standalone mode and true for distributed mode. If
+      false, startup will run all HBase and ZooKeeper daemons together
+      in the one JVM.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.quorum</name>
+    <!--Ideally we should be using a hostname here instead of IP address. Please refer to
+    https://issues.apache.org/jira/browse/HBASE-23764 for why we switched to IP address. Should be
+    changed once we fix the underlying ZK issue.-->
+    <value>127.0.0.1</value>
+    <description>Comma separated list of servers in the ZooKeeper ensemble
+      (This config. should have been named hbase.zookeeper.ensemble).
+      For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
+      By default this is set to localhost for local and pseudo-distributed modes
+      of operation. For a fully-distributed setup, this should be set to a full
+      list of ZooKeeper ensemble servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
+      this is the list of servers which hbase will start/stop ZooKeeper on as
+      part of cluster start/stop. Client-side, we will take this list of
+      ensemble members and put it together with the hbase.zookeeper.property.clientPort
+      config. and pass it into zookeeper constructor as the connectString
+      parameter.
+    </description>
+  </property>
+  <!--The above are the important configurations for getting hbase up
+    and running -->
+
+  <property>
+    <name>zookeeper.recovery.retry.maxsleeptime</name>
+    <value>60000</value>
+    <description>Max sleep time before retry zookeeper operations in milliseconds,
+      a max time is needed here so that sleep time won't grow unboundedly
+    </description>
+  </property>
+  <property>
+    <name>hbase.local.dir</name>
+    <value>${hbase.tmp.dir}/local/</value>
+    <description>Directory on the local filesystem to be used
+      as a local storage.
+    </description>
+  </property>
+
+  <!--Master configurations-->
+  <property>
+    <name>hbase.master.port</name>
+    <value>16000</value>
+    <description>The port the HBase Master should bind to.</description>
+  </property>
+  <property>
+    <name>hbase.master.info.port</name>
+    <value>16010</value>
+    <description>The port for the HBase Master web UI.
+      Set to -1 if you do not want a UI instance run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.info.bindAddress</name>
+    <value>0.0.0.0</value>
+    <description>The bind address for the HBase Master web UI
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.logcleaner.plugins</name>
+    <value>
+      org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner,org.apache.hadoop.hbase.master.cleaner.TimeToLiveProcedureWALCleaner,org.apache.hadoop.hbase.master.cleaner.TimeToLiveMasterLocalStoreWALCleaner
+    </value>
+    <description>A comma-separated list of BaseLogCleanerDelegate invoked by
+      the LogsCleaner service. These WAL cleaners are called in order,
+      so put the cleaner that prunes the most files in front. To
+      implement your own BaseLogCleanerDelegate, just put it in HBase's classpath
+      and add the fully qualified class name here. Always add the above
+      default log cleaners in the list.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.logcleaner.ttl</name>
+    <value>600000</value>
+    <description>How long a WAL remain in the archive ({hbase.rootdir}/oldWALs) directory,
+      after which it will be cleaned by a Master thread. The value is in milliseconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.hfilecleaner.plugins</name>
+    <value>
+      org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner,org.apache.hadoop.hbase.master.cleaner.TimeToLiveMasterLocalStoreHFileCleaner
+    </value>
+    <description>A comma-separated list of BaseHFileCleanerDelegate invoked by
+      the HFileCleaner service. These HFiles cleaners are called in order,
+      so put the cleaner that prunes the most files in front. To
+      implement your own BaseHFileCleanerDelegate, just put it in HBase's classpath
+      and add the fully qualified class name here. Always add the above
+      default hfile cleaners in the list as they will be overwritten in
+      hbase-site.xml.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.infoserver.redirect</name>
+    <value>true</value>
+    <description>Whether or not the Master listens to the Master web
+      UI port (hbase.master.info.port) and redirects requests to the web
+      UI server shared by the Master and RegionServer. Config. makes
+      sense when Master is serving Regions (not the default).
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.fileSplitTimeout</name>
+    <value>600000</value>
+    <description>Splitting a region, how long to wait on the file-splitting
+      step before aborting the attempt. Default: 600000. This setting used
+      to be known as hbase.regionserver.fileSplitTimeout in hbase-1.x.
+      Split is now run master-side hence the rename (If a
+      'hbase.master.fileSplitTimeout' setting found, will use it to
+      prime the current 'hbase.master.fileSplitTimeout'
+      Configuration.
+    </description>
+  </property>
+
+  <!--RegionServer configurations-->
+  <property>
+    <name>hbase.regionserver.port</name>
+    <value>16020</value>
+    <description>The port the HBase RegionServer binds to.</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.info.port</name>
+    <value>16030</value>
+    <description>The port for the HBase RegionServer web UI
+      Set to -1 if you do not want the RegionServer UI to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.info.bindAddress</name>
+    <value>0.0.0.0</value>
+    <description>The address for the HBase RegionServer web UI</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.info.port.auto</name>
+    <value>false</value>
+    <description>Whether or not the Master or RegionServer
+      UI should search for a port to bind to. Enables automatic port
+      search if hbase.regionserver.info.port is already in use.
+      Useful for testing, turned off by default.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.handler.count</name>
+    <value>30</value>
+    <description>Count of RPC Listener instances spun up on RegionServers.
+      Same property is used by the Master for count of master handlers.
+      Too many handlers can be counter-productive. Make it a multiple of
+      CPU count. If mostly read-only, handlers count close to cpu count
+      does well. Start with twice the CPU count and tune from there.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.server.callqueue.handler.factor</name>
+    <value>0.1</value>
+    <description>Factor to determine the number of call queues.
+      A value of 0 means a single queue shared between all the handlers.
+      A value of 1 means that each handler has its own queue.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.server.callqueue.read.ratio</name>
+    <value>0</value>
+    <description>Split the call queues into read and write queues.
+      The specified interval (which should be between 0.0 and 1.0)
+      will be multiplied by the number of call queues.
+      A value of 0 indicate to not split the call queues, meaning that both read and write
+      requests will be pushed to the same set of queues.
+      A value lower than 0.5 means that there will be less read queues than write queues.
+      A value of 0.5 means there will be the same number of read and write queues.
+      A value greater than 0.5 means that there will be more read queues than write queues.
+      A value of 1.0 means that all the queues except one are used to dispatch read requests.
+
+      Example: Given the total number of call queues being 10
+      a read.ratio of 0 means that: the 10 queues will contain both read/write requests.
+      a read.ratio of 0.3 means that: 3 queues will contain only read requests
+      and 7 queues will contain only write requests.
+      a read.ratio of 0.5 means that: 5 queues will contain only read requests
+      and 5 queues will contain only write requests.
+      a read.ratio of 0.8 means that: 8 queues will contain only read requests
+      and 2 queues will contain only write requests.
+      a read.ratio of 1 means that: 9 queues will contain only read requests
+      and 1 queues will contain only write requests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.server.callqueue.scan.ratio</name>
+    <value>0</value>
+    <description>Given the number of read call queues, calculated from the total number
+      of call queues multiplied by the callqueue.read.ratio, the scan.ratio property
+      will split the read call queues into small-read and long-read queues.
+      A value lower than 0.5 means that there will be less long-read queues than short-read queues.
+      A value of 0.5 means that there will be the same number of short-read and long-read queues.
+      A value greater than 0.5 means that there will be more long-read queues than short-read queues
+      A value of 0 or 1 indicate to use the same set of queues for gets and scans.
+
+      Example: Given the total number of read call queues being 8
+      a scan.ratio of 0 or 1 means that: 8 queues will contain both long and short read requests.
+      a scan.ratio of 0.3 means that: 2 queues will contain only long-read requests
+      and 6 queues will contain only short-read requests.
+      a scan.ratio of 0.5 means that: 4 queues will contain only long-read requests
+      and 4 queues will contain only short-read requests.
+      a scan.ratio of 0.8 means that: 6 queues will contain only long-read requests
+      and 2 queues will contain only short-read requests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.msginterval</name>
+    <value>3000</value>
+    <description>Interval between messages from the RegionServer to Master
+      in milliseconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.logroll.period</name>
+    <value>3600000</value>
+    <description>Period at which we will roll the commit log regardless
+      of how many edits it has.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.logroll.errors.tolerated</name>
+    <value>2</value>
+    <description>The number of consecutive WAL close errors we will allow
+      before triggering a server abort. A setting of 0 will cause the
+      region server to abort if closing the current WAL writer fails during
+      log rolling. Even a small value (2 or 3) will allow a region server
+      to ride over transient HDFS errors.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.hlog.reader.impl</name>
+    <value>org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader</value>
+    <description>The WAL file reader implementation.</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.hlog.writer.impl</name>
+    <value>org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter</value>
+    <description>The WAL file writer implementation.</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.global.memstore.size</name>
+    <value></value>
+    <description>Maximum size of all memstores in a region server before new
+      updates are blocked and flushes are forced. Defaults to 40% of heap (0.4).
+      Updates are blocked and flushes are forced until size of all memstores
+      in a region server hits hbase.regionserver.global.memstore.size.lower.limit.
+      The default value in this configuration has been intentionally left empty in order to
+      honor the old hbase.regionserver.global.memstore.upperLimit property if present.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.global.memstore.size.lower.limit</name>
+    <value></value>
+    <description>Maximum size of all memstores in a region server before flushes
+      are forced. Defaults to 95% of hbase.regionserver.global.memstore.size
+      (0.95). A 100% value for this value causes the minimum possible flushing
+      to occur when updates are blocked due to memstore limiting. The default
+      value in this configuration has been intentionally left empty in order to
+      honor the old hbase.regionserver.global.memstore.lowerLimit property if
+      present.
+    </description>
+  </property>
+  <property>
+    <name>hbase.systemtables.compacting.memstore.type</name>
+    <value>NONE</value>
+    <description>Determines the type of memstore to be used for system tables like
+      META, namespace tables etc. By default NONE is the type and hence we use the
+      default memstore for all the system tables. If we need to use compacting
+      memstore for system tables then set this property to BASIC/EAGER
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.optionalcacheflushinterval</name>
+    <value>3600000</value>
+    <description>
+      Maximum amount of time an edit lives in memory before being automatically flushed.
+      Default 1 hour. Set it to 0 to disable automatic flushing.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.dns.interface</name>
+    <value>default</value>
+    <description>The name of the Network Interface from which a region server
+      should report its IP address.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.dns.nameserver</name>
+    <value>default</value>
+    <description>The host name or IP address of the name server (DNS)
+      which a region server should use to determine the host name used by the
+      master for communication and display purposes.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.region.split.policy</name>
+    <value>org.apache.hadoop.hbase.regionserver.SteppingSplitPolicy</value>
+    <description>
+      A split policy determines when a region should be split. The various
+      other split policies that are available currently are BusyRegionSplitPolicy,
+      ConstantSizeRegionSplitPolicy, DisabledRegionSplitPolicy,
+      DelimitedKeyPrefixRegionSplitPolicy, KeyPrefixRegionSplitPolicy, and
+      SteppingSplitPolicy. DisabledRegionSplitPolicy blocks manual region splitting.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.regionSplitLimit</name>
+    <value>1000</value>
+    <description>
+      Limit for the number of regions after which no more region splitting
+      should take place. This is not hard limit for the number of regions
+      but acts as a guideline for the regionserver to stop splitting after
+      a certain limit. Default is set to 1000.
+    </description>
+  </property>
+
+  <!--ZooKeeper configuration-->
+  <property>
+    <name>zookeeper.session.timeout</name>
+    <value>90000</value>
+    <description>ZooKeeper session timeout in milliseconds. It is used in two different ways.
+      First, this value is used in the ZK client that HBase uses to connect to the ensemble.
+      It is also used by HBase when it starts a ZK server and it is passed as the 'maxSessionTimeout'.
+      See https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#ch_zkSessions.
+      For example, if an HBase region server connects to a ZK ensemble that's also managed
+      by HBase, then the session timeout will be the one specified by this configuration.
+      But, a region server that connects to an ensemble managed with a different configuration
+      will be subjected that ensemble's maxSessionTimeout. So, even though HBase might propose
+      using 90 seconds, the ensemble can have a max timeout lower than this and it will take
+      precedence. The current default maxSessionTimeout that ZK ships with is 40 seconds, which is lower than
+      HBase's.
+    </description>
+  </property>
+  <property>
+    <name>zookeeper.znode.parent</name>
+    <value>/hbase</value>
+    <description>Root ZNode for HBase in ZooKeeper. All of HBase's ZooKeeper
+      files that are configured with a relative path will go under this node.
+      By default, all of HBase's ZooKeeper file paths are configured with a
+      relative path, so they will all go under this directory unless changed.
+    </description>
+  </property>
+  <property>
+    <name>zookeeper.znode.acl.parent</name>
+    <value>acl</value>
+    <description>Root ZNode for access control lists.</description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.dns.interface</name>
+    <value>default</value>
+    <description>The name of the Network Interface from which a ZooKeeper server
+      should report its IP address.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.dns.nameserver</name>
+    <value>default</value>
+    <description>The host name or IP address of the name server (DNS)
+      which a ZooKeeper server should use to determine the host name used by the
+      master for communication and display purposes.
+    </description>
+  </property>
+  <!--
+  The following three properties are used together to create the list of
+  host:peer_port:leader_port quorum servers for ZooKeeper.
+  -->
+  <property>
+    <name>hbase.zookeeper.peerport</name>
+    <value>2888</value>
+    <description>Port used by ZooKeeper peers to talk to each other.
+      See https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
+      for more information.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.leaderport</name>
+    <value>3888</value>
+    <description>Port used by ZooKeeper for leader election.
+      See https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
+      for more information.
+    </description>
+  </property>
+  <!-- End of properties used to generate ZooKeeper host:port quorum list. -->
+
+  <!--
+  Beginning of properties that are directly mapped from ZooKeeper's zoo.cfg.
+  All properties with an "hbase.zookeeper.property." prefix are converted for
+  ZooKeeper's configuration. Hence, if you want to add an option from zoo.cfg,
+  e.g.  "initLimit=10" you would append the following to your configuration:
+    <property>
+      <name>hbase.zookeeper.property.initLimit</name>
+      <value>10</value>
+    </property>
+  -->
+  <property>
+    <name>hbase.zookeeper.property.initLimit</name>
+    <value>10</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+      The number of ticks that the initial synchronization phase can take.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.syncLimit</name>
+    <value>5</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+      The number of ticks that can pass between sending a request and getting an
+      acknowledgment.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.dataDir</name>
+    <value>${hbase.tmp.dir}/zookeeper</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+      The directory where the snapshot is stored.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.clientPort</name>
+    <value>2181</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+      The port at which the clients will connect.
+    </description>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.maxClientCnxns</name>
+    <value>300</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+      Limit on number of concurrent connections (at the socket level) that a
+      single client, identified by IP address, may make to a single member of
+      the ZooKeeper ensemble. Set high to avoid zk connection issues running
+      standalone and pseudo-distributed.
+    </description>
+  </property>
+  <!-- End of properties that are directly mapped from ZooKeeper's zoo.cfg -->
+
+  <!--Client configurations-->
+  <property>
+    <name>hbase.client.write.buffer</name>
+    <value>2097152</value>
+    <description>Default size of the BufferedMutator write buffer in bytes.
+      A bigger buffer takes more memory -- on both the client and server
+      side since server instantiates the passed write buffer to process
+      it -- but a larger buffer size reduces the number of RPCs made.
+      For an estimate of server-side memory-used, evaluate
+      hbase.client.write.buffer * hbase.regionserver.handler.count
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.pause</name>
+    <value>100</value>
+    <description>General client pause value. Used mostly as value to wait
+      before running a retry of a failed get, region lookup, etc.
+      See hbase.client.retries.number for description of how we backoff from
+      this initial pause amount and how this pause works w/ retries.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.pause.cqtbe</name>
+    <value></value>
+    <description>Whether or not to use a special client pause for
+      CallQueueTooBigException (cqtbe). Set this property to a higher value
+      than hbase.client.pause if you observe frequent CQTBE from the same
+      RegionServer and the call queue there keeps full
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.retries.number</name>
+    <value>15</value>
+    <description>Maximum retries. Used as maximum for all retryable
+      operations such as the getting of a cell's value, starting a row update,
+      etc. Retry interval is a rough function based on hbase.client.pause. At
+      first we retry at this interval but then with backoff, we pretty quickly reach
+      retrying every ten seconds. See HConstants#RETRY_BACKOFF for how the backup
+      ramps up. Change this setting and hbase.client.pause to suit your workload.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.max.total.tasks</name>
+    <value>100</value>
+    <description>The maximum number of concurrent mutation tasks a single HTable instance will
+      send to the cluster.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.max.perserver.tasks</name>
+    <value>2</value>
+    <description>The maximum number of concurrent mutation tasks a single HTable instance will
+      send to a single region server.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.max.perregion.tasks</name>
+    <value>1</value>
+    <description>The maximum number of concurrent mutation tasks the client will
+      maintain to a single Region. That is, if there is already
+      hbase.client.max.perregion.tasks writes in progress for this region, new puts
+      won't be sent to this region until some writes finishes.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.perserver.requests.threshold</name>
+    <value>2147483647</value>
+    <description>The max number of concurrent pending requests for one server in all client threads
+      (process level). Exceeding requests will be thrown ServerTooBusyException immediately to prevent
+      user's threads being occupied and blocked by only one slow region server. If you use a fix
+      number of threads to access HBase in a synchronous way, set this to a suitable value which is
+      related to the number of threads will help you. See
+      https://issues.apache.org/jira/browse/HBASE-16388 for details.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.scanner.caching</name>
+    <value>2147483647</value>
+    <description>Number of rows that we try to fetch when calling next
+      on a scanner if it is not served from (local, client) memory. This configuration
+      works together with hbase.client.scanner.max.result.size to try and use the
+      network efficiently. The default value is Integer.MAX_VALUE by default so that
+      the network will fill the chunk size defined by hbase.client.scanner.max.result.size
+      rather than be limited by a particular number of rows since the size of rows varies
+      table to table. If you know ahead of time that you will not require more than a certain
+      number of rows from a scan, this configuration should be set to that row limit via
+      Scan#setCaching. Higher caching values will enable faster scanners but will eat up more
+      memory and some calls of next may take longer and longer times when the cache is empty.
+      Do not set this value such that the time between invocations is greater than the scanner
+      timeout; i.e. hbase.client.scanner.timeout.period
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.keyvalue.maxsize</name>
+    <value>10485760</value>
+    <description>Specifies the combined maximum allowed size of a KeyValue
+      instance. This is to set an upper boundary for a single entry saved in a
+      storage file. Since they cannot be split it helps avoiding that a region
+      cannot be split any further because the data is too large. It seems wise
+      to set this to a fraction of the maximum region size. Setting it to zero
+      or less disables the check.
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.keyvalue.maxsize</name>
+    <value>10485760</value>
+    <description>Maximum allowed size of an individual cell, inclusive of value and all key
+      components. A value of 0 or less disables the check.
+      The default value is 10MB.
+      This is a safety setting to protect the server from OOM situations.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.scanner.timeout.period</name>
+    <value>60000</value>
+    <description>Client scanner lease period in milliseconds.</description>
+  </property>
+  <property>
+    <name>hbase.client.localityCheck.threadPoolSize</name>
+    <value>2</value>
+  </property>
+
+  <!--Miscellaneous configuration-->
+  <property>
+    <name>hbase.bulkload.retries.number</name>
+    <value>10</value>
+    <description>Maximum retries. This is maximum number of iterations
+      to atomic bulk loads are attempted in the face of splitting operations
+      0 means never give up.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.balancer.maxRitPercent</name>
+    <value>1.0</value>
+    <description>The max percent of regions in transition when balancing.
+      The default value is 1.0. So there are no balancer throttling. If set this config to 0.01,
+      It means that there are at most 1% regions in transition when balancing.
+      Then the cluster's availability is at least 99% when balancing.
+    </description>
+  </property>
+  <property>
+    <name>hbase.balancer.period
+    </name>
+    <value>300000</value>
+    <description>Period at which the region balancer runs in the Master, in
+      milliseconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regions.slop</name>
+    <value>0.001</value>
+    <description>Rebalance if any regionserver has average + (average * slop) regions.
+      The default value of this parameter is 0.001 in StochasticLoadBalancer (the default load
+      balancer), while the default is 0.2 in other load balancers (i.e.,
+      SimpleLoadBalancer).
+    </description>
+  </property>
+  <property>
+    <name>hbase.normalizer.period</name>
+    <value>300000</value>
+    <description>Period at which the region normalizer runs in the Master, in
+      milliseconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.normalizer.split.enabled</name>
+    <value>true</value>
+    <description>Whether to split a region as part of normalization.</description>
+  </property>
+  <property>
+    <name>hbase.normalizer.merge.enabled</name>
+    <value>true</value>
+    <description>Whether to merge a region as part of normalization.</description>
+  </property>
+  <property>
+    <name>hbase.normalizer.min.region.count</name>
+    <value>3</value>
+    <description>The minimum number of regions in a table to consider it for merge
+      normalization.
+    </description>
+  </property>
+  <property>
+    <name>hbase.normalizer.merge.min_region_age.days</name>
+    <value>3</value>
+    <description>The minimum age for a region to be considered for a merge, in days.</description>
+  </property>
+  <property>
+    <name>hbase.normalizer.merge.min_region_age.days</name>
+    <value>3</value>
+    <description>The minimum age for a region to be considered for a merge, in days.</description>
+  </property>
+  <property>
+    <name>hbase.normalizer.merge.min_region_size.mb</name>
+    <value>1</value>
+    <description>The minimum size for a region to be considered for a merge, in whole
+      MBs.
+    </description>
+  </property>
+  <property>
+    <name>hbase.table.normalization.enabled</name>
+    <value>false</value>
+    <description>This config is used to set default behaviour of normalizer at table level.
+      To override this at table level one can set NORMALIZATION_ENABLED at table descriptor level
+      and that property will be honored
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>10000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+      Used as sleep interval by service threads such as log roller.
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.versionfile.writeattempts</name>
+    <value>3</value>
+    <description>
+      How many times to retry attempting to write a version file
+      before just aborting. Each attempt is separated by the
+      hbase.server.thread.wakefrequency milliseconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.memstore.flush.size</name>
+    <value>134217728</value>
+    <description>
+      Memstore will be flushed to disk if size of the memstore
+      exceeds this number of bytes. Value is checked by a thread that runs
+      every hbase.server.thread.wakefrequency.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.percolumnfamilyflush.size.lower.bound.min</name>
+    <value>16777216</value>
+    <description>
+      If FlushLargeStoresPolicy is used and there are multiple column families,
+      then every time that we hit the total memstore limit, we find out all the
+      column families whose memstores exceed a "lower bound" and only flush them
+      while retaining the others in memory. The "lower bound" will be
+      "hbase.hregion.memstore.flush.size / column_family_number" by default
+      unless value of this property is larger than that. If none of the families
+      have their memstore size more than lower bound, all the memstores will be
+      flushed (just as usual).
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.preclose.flush.size</name>
+    <value>5242880</value>
+    <description>
+      If the memstores in a region are this size or larger when we go
+      to close, run a "pre-flush" to clear out memstores before we put up
+      the region closed flag and take the region offline. On close,
+      a flush is run under the close flag to empty memory. During
+      this time the region is offline and we are not taking on any writes.
+      If the memstore content is large, this flush could take a long time to
+      complete. The preflush is meant to clean out the bulk of the memstore
+      before putting up the close flag and taking the region offline so the
+      flush that runs under the close flag has little to do.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.memstore.block.multiplier</name>
+    <value>4</value>
+    <description>
+      Block updates if memstore has hbase.hregion.memstore.block.multiplier
+      times hbase.hregion.memstore.flush.size bytes. Useful preventing
+      runaway memstore during spikes in update traffic. Without an
+      upper-bound, memstore fills such that when it flushes the
+      resultant flush files take a long time to compact or split, or
+      worse, we OOME.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.memstore.mslab.enabled</name>
+    <value>true</value>
+    <description>
+      Enables the MemStore-Local Allocation Buffer,
+      a feature which works to prevent heap fragmentation under
+      heavy write loads. This can reduce the frequency of stop-the-world
+      GC pauses on large heaps.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.memstore.mslab.chunksize</name>
+    <value>2097152</value>
+    <description>The maximum byte size of a chunk in the MemStoreLAB. Unit: bytes</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.offheap.global.memstore.size</name>
+    <value>0</value>
+    <description>The amount of off-heap memory all MemStores in a RegionServer may use.
+      A value of 0 means that no off-heap memory will be used and all chunks in MSLAB
+      will be HeapByteBuffer, otherwise the non-zero value means how many megabyte of
+      off-heap memory will be used for chunks in MSLAB and all chunks in MSLAB will be
+      DirectByteBuffer. Unit: megabytes.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.memstore.mslab.max.allocation</name>
+    <value>262144</value>
+    <description>The maximal size of one allocation in the MemStoreLAB, if the desired byte
+      size exceed this threshold then it will be just allocated from JVM heap rather than MemStoreLAB.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.max.filesize</name>
+    <value>10737418240</value>
+    <description>
+      Maximum HFile size. If the sum of the sizes of a region's HFiles has grown to exceed this
+      value, the region is split in two.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.split.overallfiles</name>
+    <value>false</value>
+    <description>If we should sum overall region files size when check to split.</description>
+  </property>
+  <property>
+    <name>hbase.hregion.majorcompaction</name>
+    <value>604800000</value>
+    <description>Time between major compactions, expressed in milliseconds. Set to 0 to disable
+      time-based automatic major compactions. User-requested and size-based major compactions will
+      still run. This value is multiplied by hbase.hregion.majorcompaction.jitter to cause
+      compaction to start at a somewhat-random time during a given window of time. The default value
+      is 7 days, expressed in milliseconds. If major compactions are causing disruption in your
+      environment, you can configure them to run at off-peak times for your deployment, or disable
+      time-based major compactions by setting this parameter to 0, and run major compactions in a
+      cron job or by another external mechanism.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.majorcompaction.jitter</name>
+    <value>0.50</value>
+    <description>A multiplier applied to hbase.hregion.majorcompaction to cause compaction to occur
+      a given amount of time either side of hbase.hregion.majorcompaction. The smaller the number,
+      the closer the compactions will happen to the hbase.hregion.majorcompaction
+      interval.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compactionThreshold</name>
+    <value>3</value>
+    <description>If more than this number of StoreFiles exist in any one Store
+      (one StoreFile is written per flush of MemStore), a compaction is run to rewrite all
+      StoreFiles into a single StoreFile. Larger values delay compaction, but when compaction does
+      occur, it takes longer to complete.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.compaction.enabled</name>
+    <value>true</value>
+    <description>Enable/disable compactions on by setting true/false.
+      We can further switch compactions dynamically with the
+      compaction_switch shell command.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.flusher.count</name>
+    <value>2</value>
+    <description>The number of flush threads. With fewer threads, the MemStore flushes will be
+      queued. With more threads, the flushes will be executed in parallel, increasing the load on
+      HDFS, and potentially causing more compactions.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.blockingStoreFiles</name>
+    <value>16</value>
+    <description>If more than this number of StoreFiles exist in any one Store (one StoreFile
+      is written per flush of MemStore), updates are blocked for this region until a compaction is
+      completed, or until hbase.hstore.blockingWaitTime has been exceeded.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.blockingWaitTime</name>
+    <value>90000</value>
+    <description>The time for which a region will block updates after reaching the StoreFile limit
+      defined by hbase.hstore.blockingStoreFiles. After this time has elapsed, the region will stop
+      blocking updates even if a compaction has not been completed.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.min</name>
+    <value></value>
+    <description>The minimum number of StoreFiles which must be eligible for compaction before
+      compaction can run. The goal of tuning hbase.hstore.compaction.min is to avoid ending up with
+      too many tiny StoreFiles to compact. Setting this value to 2 would cause a minor compaction
+      each time you have two StoreFiles in a Store, and this is probably not appropriate. If you
+      set this value too high, all the other values will need to be adjusted accordingly. For most
+      cases, the default value is appropriate (empty value here, results in 3 by code logic). In
+      previous versions of HBase, the parameter hbase.hstore.compaction.min was named
+      hbase.hstore.compactionThreshold.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.max</name>
+    <value>10</value>
+    <description>The maximum number of StoreFiles which will be selected for a single minor
+      compaction, regardless of the number of eligible StoreFiles. Effectively, the value of
+      hbase.hstore.compaction.max controls the length of time it takes a single compaction to
+      complete. Setting it larger means that more StoreFiles are included in a compaction. For most
+      cases, the default value is appropriate.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.min.size</name>
+    <value>134217728</value>
+    <description>A StoreFile (or a selection of StoreFiles, when using ExploringCompactionPolicy)
+      smaller than this size will always be eligible for minor compaction.
+      HFiles this size or larger are evaluated by hbase.hstore.compaction.ratio to determine if
+      they are eligible. Because this limit represents the "automatic include" limit for all
+      StoreFiles smaller than this value, this value may need to be reduced in write-heavy
+      environments where many StoreFiles in the 1-2 MB range are being flushed, because every
+      StoreFile will be targeted for compaction and the resulting StoreFiles may still be under the
+      minimum size and require further compaction. If this parameter is lowered, the ratio check is
+      triggered more quickly. This addressed some issues seen in earlier versions of HBase but
+      changing this parameter is no longer necessary in most situations. Default: 128 MB expressed
+      in bytes.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.max.size</name>
+    <value>9223372036854775807</value>
+    <description>A StoreFile (or a selection of StoreFiles, when using ExploringCompactionPolicy)
+      larger than this size will be excluded from compaction. The effect of
+      raising hbase.hstore.compaction.max.size is fewer, larger StoreFiles that do not get
+      compacted often. If you feel that compaction is happening too often without much benefit, you
+      can try raising this value. Default: the value of LONG.MAX_VALUE, expressed in bytes.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.ratio</name>
+    <value>1.2F</value>
+    <description>For minor compaction, this ratio is used to determine whether a given StoreFile
+      which is larger than hbase.hstore.compaction.min.size is eligible for compaction. Its
+      effect is to limit compaction of large StoreFiles. The value of hbase.hstore.compaction.ratio
+      is expressed as a floating-point decimal. A large ratio, such as 10, will produce a single
+      giant StoreFile. Conversely, a low value, such as .25, will produce behavior similar to the
+      BigTable compaction algorithm, producing four StoreFiles. A moderate value of between 1.0 and
+      1.4 is recommended. When tuning this value, you are balancing write costs with read costs.
+      Raising the value (to something like 1.4) will have more write costs, because you will
+      compact larger StoreFiles. However, during reads, HBase will need to seek through fewer
+      StoreFiles to accomplish the read. Consider this approach if you cannot take advantage of
+      Bloom filters. Otherwise, you can lower this value to something like 1.0 to reduce the
+      background cost of writes, and use Bloom filters to control the number of StoreFiles touched
+      during reads. For most cases, the default value is appropriate.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.ratio.offpeak</name>
+    <value>5.0F</value>
+    <description>Allows you to set a different (by default, more aggressive) ratio for determining
+      whether larger StoreFiles are included in compactions during off-peak hours. Works in the
+      same way as hbase.hstore.compaction.ratio. Only applies if hbase.offpeak.start.hour and
+      hbase.offpeak.end.hour are also enabled.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.time.to.purge.deletes</name>
+    <value>0</value>
+    <description>The amount of time to delay purging of delete markers with future timestamps. If
+      unset, or set to 0, all delete markers, including those with future timestamps, are purged
+      during the next major compaction. Otherwise, a delete marker is kept until the major compaction
+      which occurs after the marker's timestamp plus the value of this setting, in milliseconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.offpeak.start.hour</name>
+    <value>-1</value>
+    <description>The start of off-peak hours, expressed as an integer between 0 and 23, inclusive.
+      Set to -1 to disable off-peak.
+    </description>
+  </property>
+  <property>
+    <name>hbase.offpeak.end.hour</name>
+    <value>-1</value>
+    <description>The end of off-peak hours, expressed as an integer between 0 and 23, inclusive. Set
+      to -1 to disable off-peak.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.thread.compaction.throttle</name>
+    <value>2684354560</value>
+    <description>There are two different thread pools for compactions, one for large compactions and
+      the other for small compactions. This helps to keep compaction of lean tables (such as
+      hbase:meta) fast. If a compaction is larger than this threshold, it
+      goes into the large compaction pool. In most cases, the default value is appropriate. Default:
+      2 x hbase.hstore.compaction.max x hbase.hregion.memstore.flush.size (which defaults to 128MB).
+      The value field assumes that the value of hbase.hregion.memstore.flush.size is unchanged from
+      the default.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.majorcompaction.pagecache.drop</name>
+    <value>true</value>
+    <description>Specifies whether to drop pages read/written into the system page cache by
+      major compactions. Setting it to true helps prevent major compactions from
+      polluting the page cache, which is almost always required, especially for clusters
+      with low/moderate memory to storage ratio.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.minorcompaction.pagecache.drop</name>
+    <value>true</value>
+    <description>Specifies whether to drop pages read/written into the system page cache by
+      minor compactions. Setting it to true helps prevent minor compactions from
+      polluting the page cache, which is most beneficial on clusters with low
+      memory to storage ratio or very write heavy clusters. You may want to set it to
+      false under moderate to low write workload when bulk of the reads are
+      on the most recently written data.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.kv.max</name>
+    <value>10</value>
+    <description>The maximum number of KeyValues to read and then write in a batch when flushing or
+      compacting. Set this lower if you have big KeyValues and problems with Out Of Memory
+      Exceptions Set this higher if you have wide, small rows.
+    </description>
+  </property>
+  <property>
+    <name>hbase.storescanner.parallel.seek.enable</name>
+    <value>false</value>
+    <description>
+      Enables StoreFileScanner parallel-seeking in StoreScanner,
+      a feature which can reduce response latency under special conditions.
+    </description>
+  </property>
+  <property>
+    <name>hbase.storescanner.parallel.seek.threads</name>
+    <value>10</value>
+    <description>
+      The default thread pool size if parallel-seeking feature enabled.
+    </description>
+  </property>
+  <property>
+    <name>hfile.block.cache.policy</name>
+    <value>LRU</value>
+    <description>The eviction policy for the L1 block cache (LRU or TinyLFU).</description>
+  </property>
+  <property>
+    <name>hfile.block.cache.size</name>
+    <value>0.4</value>
+    <description>Percentage of maximum heap (-Xmx setting) to allocate to block cache
+      used by a StoreFile. Default of 0.4 means allocate 40%.
+      Set to 0 to disable but it's not recommended; you need at least
+      enough cache to hold the storefile indices.
+    </description>
+  </property>
+  <property>
+    <name>hfile.block.index.cacheonwrite</name>
+    <value>false</value>
+    <description>This allows to put non-root multi-level index blocks into the block
+      cache at the time the index is being written.
+    </description>
+  </property>
+  <property>
+    <name>hfile.index.block.max.size</name>
+    <value>131072</value>
+    <description>When the size of a leaf-level, intermediate-level, or root-level
+      index block in a multi-level block index grows to this size, the
+      block is written out and a new block is started.
+    </description>
+  </property>
+  <property>
+    <name>hbase.bucketcache.ioengine</name>
+    <value></value>
+    <description>Where to store the contents of the bucketcache. One of: offheap,
+      file, files, mmap or pmem. If a file or files, set it to file(s):PATH_TO_FILE.
+      mmap means the content will be in an mmaped file. Use mmap:PATH_TO_FILE. 'pmem'
+      is bucket cache over a file on the persistent memory device.
+      Use pmem:PATH_TO_FILE.
+      See http://hbase.apache.org/book.html#offheap.blockcache for more information.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.throughput.lower.bound</name>
+    <value>52428800</value>
+    <description>The target lower bound on aggregate compaction throughput, in bytes/sec. Allows
+      you to tune the minimum available compaction throughput when the
+      PressureAwareCompactionThroughputController throughput controller is active. (It is active by
+      default.)
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.compaction.throughput.higher.bound</name>
+    <value>104857600</value>
+    <description>The target upper bound on aggregate compaction throughput, in bytes/sec. Allows
+      you to control aggregate compaction throughput demand when the
+      PressureAwareCompactionThroughputController throughput controller is active. (It is active by
+      default.) The maximum throughput will be tuned between the lower and upper bounds when
+      compaction pressure is within the range [0.0, 1.0]. If compaction pressure is 1.0 or greater
+      the higher bound will be ignored until pressure returns to the normal range.
+    </description>
+  </property>
+  <property>
+    <name>hbase.bucketcache.size</name>
+    <value></value>
+    <description>A float that EITHER represents a percentage of total heap memory
+      size to give to the cache (if &lt; 1.0) OR, it is the total capacity in
+      megabytes of BucketCache. Default: 0.0
+    </description>
+  </property>
+  <property>
+    <name>hbase.bucketcache.bucket.sizes</name>
+    <value></value>
+    <description>A comma-separated list of sizes for buckets for the bucketcache.
+      Can be multiple sizes. List block sizes in order from smallest to largest.
+      The sizes you use will depend on your data access patterns.
+      Must be a multiple of 256 else you will run into
+      'java.io.IOException: Invalid HFile block magic' when you go to read from cache.
+      If you specify no values here, then you pick up the default bucketsizes set
+      in code (See BucketAllocator#DEFAULT_BUCKET_SIZES).
+    </description>
+  </property>
+  <property>
+    <name>hfile.format.version</name>
+    <value>3</value>
+    <description>The HFile format version to use for new files.
+      Version 3 adds support for tags in hfiles (See http://hbase.apache.org/book.html#hbase.tags).
+      Also see the configuration 'hbase.replication.rpc.codec'.
+    </description>
+  </property>
+  <property>
+    <name>hfile.block.bloom.cacheonwrite</name>
+    <value>false</value>
+    <description>Enables cache-on-write for inline blocks of a compound Bloom filter.</description>
+  </property>
+  <property>
+    <name>io.storefile.bloom.block.size</name>
+    <value>131072</value>
+    <description>The size in bytes of a single block ("chunk") of a compound Bloom
+      filter. This size is approximate, because Bloom blocks can only be
+      inserted at data block boundaries, and the number of keys per data
+      block varies.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rs.cacheblocksonwrite</name>
+    <value>false</value>
+    <description>Whether an HFile block should be added to the block cache when the
+      block is finished.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rpc.timeout</name>
+    <value>60000</value>
+    <description>This is for the RPC layer to define how long (millisecond) HBase client applications
+      take for a remote call to time out. It uses pings to check connections
+      but will eventually throw a TimeoutException.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.operation.timeout</name>
+    <value>1200000</value>
+    <description>Operation timeout is a top-level restriction (millisecond) that makes sure a
+      blocking operation in Table will not be blocked more than this. In each operation, if rpc
+      request fails because of timeout or other reason, it will retry until success or throw
+      RetriesExhaustedException. But if the total time being blocking reach the operation timeout
+      before retries exhausted, it will break early and throw SocketTimeoutException.
+    </description>
+  </property>
+  <property>
+    <name>hbase.cells.scanned.per.heartbeat.check</name>
+    <value>10000</value>
+    <description>The number of cells scanned in between heartbeat checks. Heartbeat
+      checks occur during the processing of scans to determine whether or not the
+      server should stop scanning in order to send back a heartbeat message to the
+      client. Heartbeat messages are used to keep the client-server connection alive
+      during long running scans. Small values mean that the heartbeat checks will
+      occur more often and thus will provide a tighter bound on the execution time of
+      the scan. Larger values mean that the heartbeat checks occur less frequently
+    </description>
+  </property>
+  <property>
+    <name>hbase.rpc.shortoperation.timeout</name>
+    <value>10000</value>
+    <description>This is another version of "hbase.rpc.timeout". For those RPC operation
+      within cluster, we rely on this configuration to set a short timeout limitation
+      for short operation. For example, short rpc timeout for region server's trying
+      to report to active master can benefit quicker master failover process.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.client.tcpnodelay</name>
+    <value>true</value>
+    <description>Set no delay on rpc socket connections. See
+      http://docs.oracle.com/javase/1.5.0/docs/api/java/net/Socket.html#getTcpNoDelay()
+    </description>
+  </property>
+  <property>
+    <name>hbase.unsafe.regionserver.hostname</name>
+    <value></value>
+    <description>This config is for experts: don't set its value unless you really know what you are doing.
+      When set to a non-empty value, this represents the (external facing) hostname for the underlying server.
+      See https://issues.apache.org/jira/browse/HBASE-12954 for details.
+    </description>
+  </property>
+  <property>
+    <name>hbase.unsafe.regionserver.hostname.disable.master.reversedns</name>
+    <value>false</value>
+    <description>This config is for experts: don't set its value unless you really know what you are doing.
+      When set to true, regionserver will use the current node hostname for the servername and HMaster will
+      skip reverse DNS lookup and use the hostname sent by regionserver instead. Note that this config and
+      hbase.unsafe.regionserver.hostname are mutually exclusive. See https://issues.apache.org/jira/browse/HBASE-18226
+      for more details.
+    </description>
+  </property>
+  <!-- The following properties configure authentication information for
+       HBase processes when using Kerberos security.  There are no default
+       values, included here for documentation purposes -->
+  <property>
+    <name>hbase.master.keytab.file</name>
+    <value></value>
+    <description>Full path to the kerberos keytab file to use for logging in
+      the configured HMaster server principal.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.kerberos.principal</name>
+    <value></value>
+    <description>Ex. "hbase/_HOST@EXAMPLE.COM". The kerberos principal name
+      that should be used to run the HMaster process. The principal name should
+      be in the form: user/hostname@DOMAIN. If "_HOST" is used as the hostname
+      portion, it will be replaced with the actual hostname of the running
+      instance.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.keytab.file</name>
+    <value></value>
+    <description>Full path to the kerberos keytab file to use for logging in
+      the configured HRegionServer server principal.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.kerberos.principal</name>
+    <value></value>
+    <description>Ex. "hbase/_HOST@EXAMPLE.COM". The kerberos principal name
+      that should be used to run the HRegionServer process. The principal name
+      should be in the form: user/hostname@DOMAIN. If "_HOST" is used as the
+      hostname portion, it will be replaced with the actual hostname of the
+      running instance. An entry for this principal must exist in the file
+      specified in hbase.regionserver.keytab.file
+    </description>
+  </property>
+  <!-- Additional configuration specific to HBase security -->
+  <property>
+    <name>hadoop.policy.file</name>
+    <value>hbase-policy.xml</value>
+    <description>The policy configuration file used by RPC servers to make
+      authorization decisions on client requests. Only used when HBase
+      security is enabled.
+    </description>
+  </property>
+  <property>
+    <name>hbase.superuser</name>
+    <value></value>
+    <description>List of users or groups (comma-separated), who are allowed
+      full privileges, regardless of stored ACLs, across the cluster.
+      Only used when HBase security is enabled.
+    </description>
+  </property>
+  <property>
+    <name>hbase.auth.key.update.interval</name>
+    <value>86400000</value>
+    <description>The update interval for master key for authentication tokens
+      in servers in milliseconds. Only used when HBase security is enabled.
+    </description>
+  </property>
+  <property>
+    <name>hbase.auth.token.max.lifetime</name>
+    <value>604800000</value>
+    <description>The maximum lifetime in milliseconds after which an
+      authentication token expires. Only used when HBase security is enabled.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+    <value>false</value>
+    <description>When a client is configured to attempt a secure connection, but attempts to
+      connect to an insecure server, that server may instruct the client to
+      switch to SASL SIMPLE (unsecure) authentication. This setting controls
+      whether or not the client will accept this instruction from the server.
+      When false (the default), the client will not allow the fallback to SIMPLE
+      authentication, and will abort the connection.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.server.fallback-to-simple-auth-allowed</name>
+    <value>false</value>
+    <description>When a server is configured to require secure connections, it will
+      reject connection attempts from clients using SASL SIMPLE (unsecure) authentication.
+      This setting allows secure servers to accept SASL SIMPLE connections from clients
+      when the client requests. When false (the default), the server will not allow the fallback
+      to SIMPLE authentication, and will reject the connection. WARNING: This setting should ONLY
+      be used as a temporary measure while converting clients over to secure authentication. It
+      MUST BE DISABLED for secure operation.
+    </description>
+  </property>
+  <property>
+    <name>hbase.display.keys</name>
+    <value>true</value>
+    <description>When this is set to true the webUI and such will display all start/end keys
+      as part of the table details, region names, etc. When this is set to false,
+      the keys are hidden.
+    </description>
+  </property>
+  <property>
+    <name>hbase.coprocessor.enabled</name>
+    <value>true</value>
+    <description>Enables or disables coprocessor loading. If 'false'
+      (disabled), any other coprocessor related configuration will be ignored.
+    </description>
+  </property>
+  <property>
+    <name>hbase.coprocessor.user.enabled</name>
+    <value>true</value>
+    <description>Enables or disables user (aka. table) coprocessor loading.
+      If 'false' (disabled), any table coprocessor attributes in table
+      descriptors will be ignored. If "hbase.coprocessor.enabled" is 'false'
+      this setting has no effect.
+    </description>
+  </property>
+  <property>
+    <name>hbase.coprocessor.region.classes</name>
+    <value></value>
+    <description>A comma-separated list of Coprocessors that are loaded by
+      default on all tables. For any override coprocessor method, these classes
+      will be called in order. After implementing your own Coprocessor, just put
+      it in HBase's classpath and add the fully qualified class name here.
+      A coprocessor can also be loaded on demand by setting HTableDescriptor.
+    </description>
+  </property>
+  <property>
+    <name>hbase.coprocessor.master.classes</name>
+    <value></value>
+    <description>A comma-separated list of
+      org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are
+      loaded by default on the active HMaster process. For any implemented
+      coprocessor methods, the listed classes will be called in order. After
+      implementing your own MasterObserver, just put it in HBase's classpath
+      and add the fully qualified class name here.
+    </description>
+  </property>
+  <property>
+    <name>hbase.coprocessor.abortonerror</name>
+    <value>true</value>
+    <description>Set to true to cause the hosting server (master or regionserver)
+      to abort if a coprocessor fails to load, fails to initialize, or throws an
+      unexpected Throwable object. Setting this to false will allow the server to
+      continue execution but the system wide state of the coprocessor in question
+      will become inconsistent as it will be properly executing in only a subset
+      of servers, so this is most useful for debugging only.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest.port</name>
+    <value>8080</value>
+    <description>The port for the HBase REST server.</description>
+  </property>
+  <property>
+    <name>hbase.rest.readonly</name>
+    <value>false</value>
+    <description>Defines the mode the REST server will be started in. Possible values are:
+      false: All HTTP methods are permitted - GET/PUT/POST/DELETE.
+      true: Only the GET method is permitted.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest.threads.max</name>
+    <value>100</value>
+    <description>The maximum number of threads of the REST server thread pool.
+      Threads in the pool are reused to process REST requests. This
+      controls the maximum number of requests processed concurrently.
+      It may help to control the memory used by the REST server to
+      avoid OOM issues. If the thread pool is full, incoming requests
+      will be queued up and wait for some free threads.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest.threads.min</name>
+    <value>2</value>
+    <description>The minimum number of threads of the REST server thread pool.
+      The thread pool always has at least these number of threads so
+      the REST server is ready to serve incoming requests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest.support.proxyuser</name>
+    <value>false</value>
+    <description>Enables running the REST server to support proxy-user mode.</description>
+  </property>
+  <property skipInDoc="true">
+    <name>hbase.defaults.for.version</name>
+    <value>2.4.9</value>
+    <description>This defaults file was compiled for version ${project.version}. This variable is used
+      to make sure that a user doesn't have an old version of hbase-default.xml on the
+      classpath.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>false</value>
+    <description>Set to true to skip the 'hbase.defaults.for.version' check.
+      Setting this to true can be useful in contexts other than
+      the other side of a maven generation; i.e. running in an
+      IDE. You'll want to set this boolean to true to avoid
+      seeing the RuntimeException complaint: "hbase-default.xml file
+      seems to be for and old version of HBase (\${hbase.version}), this
+      version is X.X.X-SNAPSHOT"
+    </description>
+  </property>
+  <property>
+    <name>hbase.table.lock.enable</name>
+    <value>true</value>
+    <description>Set to true to enable locking the table in zookeeper for schema change operations.
+      Table locking from master prevents concurrent schema modifications to corrupt table
+      state.
+    </description>
+  </property>
+  <property>
+    <name>hbase.table.max.rowsize</name>
+    <value>1073741824</value>
+    <description>
+      Maximum size of single row in bytes (default is 1 Gb) for Get'ting
+      or Scan'ning without in-row scan flag set. If row size exceeds this limit
+      RowTooBigException is thrown to client.
+    </description>
+  </property>
+  <property>
+    <name>hbase.thrift.minWorkerThreads</name>
+    <value>16</value>
+    <description>The "core size" of the thread pool. New threads are created on every
+      connection until this many threads are created.
+    </description>
+  </property>
+  <property>
+    <name>hbase.thrift.maxWorkerThreads</name>
+    <value>1000</value>
+    <description>The maximum size of the thread pool. When the pending request queue
+      overflows, new threads are created until their number reaches this number.
+      After that, the server starts dropping connections.
+    </description>
+  </property>
+  <property>
+    <name>hbase.thrift.maxQueuedRequests</name>
+    <value>1000</value>
+    <description>The maximum number of pending Thrift connections waiting in the queue. If
+      there are no idle threads in the pool, the server queues requests. Only
+      when the queue overflows, new threads are added, up to
+      hbase.thrift.maxQueuedRequests threads.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.thrift.framed</name>
+    <value>false</value>
+    <description>Use Thrift TFramedTransport on the server side.
+      This is the recommended transport for thrift servers and requires a similar setting
+      on the client side. Changing this to false will select the default transport,
+      vulnerable to DoS when malformed requests are issued due to THRIFT-601.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.thrift.framed.max_frame_size_in_mb</name>
+    <value>2</value>
+    <description>Default frame size when using framed transport, in MB</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.thrift.compact</name>
+    <value>false</value>
+    <description>Use Thrift TCompactProtocol binary serialization protocol.</description>
+  </property>
+  <property>
+    <name>hbase.rootdir.perms</name>
+    <value>700</value>
+    <description>FS Permissions for the root data subdirectory in a secure (kerberos) setup.
+      When master starts, it creates the rootdir with this permissions or sets the permissions
+      if it does not match.
+    </description>
+  </property>
+  <property>
+    <name>hbase.wal.dir.perms</name>
+    <value>700</value>
+    <description>FS Permissions for the root WAL directory in a secure(kerberos) setup.
+      When master starts, it creates the WAL dir with this permissions or sets the permissions
+      if it does not match.
+    </description>
+  </property>
+  <property>
+    <name>hbase.data.umask.enable</name>
+    <value>false</value>
+    <description>Enable, if true, that file permissions should be assigned
+      to the files written by the regionserver
+    </description>
+  </property>
+  <property>
+    <name>hbase.data.umask</name>
+    <value>000</value>
+    <description>File permissions that should be used to write data
+      files when hbase.data.umask.enable is true
+    </description>
+  </property>
+  <property>
+    <name>hbase.snapshot.enabled</name>
+    <value>true</value>
+    <description>Set to true to allow snapshots to be taken / restored / cloned.</description>
+  </property>
+  <property>
+    <name>hbase.snapshot.restore.take.failsafe.snapshot</name>
+    <value>true</value>
+    <description>Set to true to take a snapshot before the restore operation.
+      The snapshot taken will be used in case of failure, to restore the previous state.
+      At the end of the restore operation this snapshot will be deleted
+    </description>
+  </property>
+  <property>
+    <name>hbase.snapshot.restore.failsafe.name</name>
+    <value>hbase-failsafe-{snapshot.name}-{restore.timestamp}</value>
+    <description>Name of the failsafe snapshot taken by the restore operation.
+      You can use the {snapshot.name}, {table.name} and {restore.timestamp} variables
+      to create a name based on what you are restoring.
+    </description>
+  </property>
+  <property>
+    <name>hbase.snapshot.working.dir</name>
+    <value></value>
+    <description>Location where the snapshotting process will occur. The location of the
+      completed snapshots will not change, but the temporary directory where the snapshot
+      process occurs will be set to this location. This can be a separate filesystem than
+      the root directory, for performance increase purposes. See HBASE-21098 for more
+      information
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.compactchecker.interval.multiplier</name>
+    <value>1000</value>
+    <description>The number that determines how often we scan to see if compaction is necessary.
+      Normally, compactions are done after some events (such as memstore flush), but if
+      region didn't receive a lot of writes for some time, or due to different compaction
+      policies, it may be necessary to check it periodically. The interval between checks is
+      hbase.server.compactchecker.interval.multiplier multiplied by
+      hbase.server.thread.wakefrequency.
+    </description>
+  </property>
+  <property>
+    <name>hbase.lease.recovery.timeout</name>
+    <value>900000</value>
+    <description>How long we wait on dfs lease recovery in total before giving up.</description>
+  </property>
+  <property>
+    <name>hbase.lease.recovery.dfs.timeout</name>
+    <value>64000</value>
+    <description>How long between dfs recover lease invocations. Should be larger than the sum of
+      the time it takes for the namenode to issue a block recovery command as part of
+      datanode; dfs.heartbeat.interval and the time it takes for the primary
+      datanode, performing block recovery to timeout on a dead datanode; usually
+      dfs.client.socket-timeout. See the end of HBASE-8389 for more.
+    </description>
+  </property>
+  <property>
+    <name>hbase.column.max.version</name>
+    <value>1</value>
+    <description>New column family descriptors will use this value as the default number of versions
+      to keep.
+    </description>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value></value>
+    <description>
+      If set to true, this configuration parameter enables short-circuit local
+      reads.
+    </description>
+  </property>
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value></value>
+    <description>
+      This is a path to a UNIX domain socket that will be used for
+      communication between the DataNode and local HDFS clients, if
+      dfs.client.read.shortcircuit is set to true. If the string "_PORT" is
+      present in this path, it will be replaced by the TCP port of the DataNode.
+      Be careful about permissions for the directory that hosts the shared
+      domain socket; dfsclient will complain if open to other users than the HBase user.
+    </description>
+  </property>
+  <property>
+    <name>hbase.dfs.client.read.shortcircuit.buffer.size</name>
+    <value>131072</value>
+    <description>If the DFSClient configuration
+      dfs.client.read.shortcircuit.buffer.size is unset, we will
+      use what is configured here as the short circuit read default
+      direct byte buffer size. DFSClient native default is 1MB; HBase
+      keeps its HDFS files open so number of file blocks * 1MB soon
+      starts to add up and threaten OOME because of a shortage of
+      direct memory. So, we set it down from the default. Make
+      it > the default hbase block size set in the HColumnDescriptor
+      which is usually 64k.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.checksum.verify</name>
+    <value>true</value>
+    <description>
+      If set to true (the default), HBase verifies the checksums for hfile
+      blocks. HBase writes checksums inline with the data when it writes out
+      hfiles. HDFS (as of this writing) writes checksums to a separate file
+      than the data file necessitating extra seeks. Setting this flag saves
+      some on i/o. Checksum verification by HDFS will be internally disabled
+      on hfile streams when this flag is set. If the hbase-checksum verification
+      fails, we will switch back to using HDFS checksums (so do not disable HDFS
+      checksums! And besides this feature applies to hfiles only, not to WALs).
+      If this parameter is set to false, then hbase will not verify any checksums,
+      instead it will depend on checksum verification being done in the HDFS client.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.bytes.per.checksum</name>
+    <value>16384</value>
+    <description>
+      Number of bytes in a newly created checksum chunk for HBase-level
+      checksums in hfile blocks.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hstore.checksum.algorithm</name>
+    <value>CRC32C</value>
+    <description>
+      Name of an algorithm that is used to compute checksums. Possible values
+      are NULL, CRC32, CRC32C.
+    </description>
+  </property>
+  <property>
+    <name>hbase.client.scanner.max.result.size</name>
+    <value>2097152</value>
+    <description>Maximum number of bytes returned when calling a scanner's next method.
+      Note that when a single row is larger than this limit the row is still returned completely.
+      The default value is 2MB, which is good for 1ge networks.
+      With faster and/or high latency networks this value should be increased.
+    </description>
+  </property>
+  <property>
+    <name>hbase.server.scanner.max.result.size</name>
+    <value>104857600</value>
+    <description>Maximum number of bytes returned when calling a scanner's next method.
+      Note that when a single row is larger than this limit the row is still returned completely.
+      The default value is 100MB.
+      This is a safety setting to protect the server from OOM situations.
+    </description>
+  </property>
+  <property>
+    <name>hbase.status.published</name>
+    <value>false</value>
+    <description>
+      This setting activates the publication by the master of the status of the region server.
+      When a region server dies and its recovery starts, the master will push this information
+      to the client application, to let them cut the connection immediately instead of waiting
+      for a timeout.
+    </description>
+  </property>
+  <property>
+    <name>hbase.status.publisher.class</name>
+    <value>org.apache.hadoop.hbase.master.ClusterStatusPublisher$MulticastPublisher</value>
+    <description>
+      Implementation of the status publication with a multicast message.
+    </description>
+  </property>
+  <property>
+    <name>hbase.status.listener.class</name>
+    <value>org.apache.hadoop.hbase.client.ClusterStatusListener$MulticastListener</value>
+    <description>
+      Implementation of the status listener with a multicast message.
+    </description>
+  </property>
+  <property>
+    <name>hbase.status.multicast.address.ip</name>
+    <value>226.1.1.3</value>
+    <description>
+      Multicast address to use for the status publication by multicast.
+    </description>
+  </property>
+  <property>
+    <name>hbase.status.multicast.address.port</name>
+    <value>16100</value>
+    <description>
+      Multicast port to use for the status publication by multicast.
+    </description>
+  </property>
+  <property>
+    <name>hbase.dynamic.jars.dir</name>
+    <value>${hbase.rootdir}/lib</value>
+    <description>
+      The directory from which the custom filter JARs can be loaded
+      dynamically by the region server without the need to restart. However,
+      an already loaded filter/co-processor class would not be un-loaded. See
+      HBASE-1936 for more details.
+
+      Does not apply to coprocessors.
+    </description>
+  </property>
+  <property>
+    <name>hbase.security.authentication</name>
+    <value>simple</value>
+    <description>
+      Controls whether or not secure authentication is enabled for HBase.
+      Possible values are 'simple' (no authentication), and 'kerberos'.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest.filter.classes</name>
+    <value>org.apache.hadoop.hbase.rest.filter.GzipFilter</value>
+    <description>
+      Servlet filters for REST service.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.loadbalancer.class</name>
+    <value>org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer</value>
+    <description>
+      Class used to execute the regions balancing when the period occurs.
+      See the class comment for more on how it works
+      http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.html
+      It replaces the DefaultLoadBalancer as the default (since renamed
+      as the SimpleLoadBalancer).
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.loadbalance.bytable</name>
+    <value>false</value>
+    <description>Factor Table name when the balancer runs.
+      Default: false.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.normalizer.class</name>
+    <value>org.apache.hadoop.hbase.master.normalizer.SimpleRegionNormalizer</value>
+    <description>
+      Class used to execute the region normalization when the period occurs.
+      See the class comment for more on how it works
+      http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.html
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest.csrf.enabled</name>
+    <value>false</value>
+    <description>
+      Set to true to enable protection against cross-site request forgery (CSRF)
+    </description>
+  </property>
+  <property>
+    <name>hbase.rest-csrf.browser-useragents-regex</name>
+    <value>^Mozilla.*,^Opera.*</value>
+    <description>
+      A comma-separated list of regular expressions used to match against an HTTP
+      request's User-Agent header when protection against cross-site request
+      forgery (CSRF) is enabled for REST server by setting
+      hbase.rest.csrf.enabled to true. If the incoming User-Agent matches
+      any of these regular expressions, then the request is considered to be sent
+      by a browser, and therefore CSRF prevention is enforced. If the request's
+      User-Agent does not match any of these regular expressions, then the request
+      is considered to be sent by something other than a browser, such as scripted
+      automation. In this case, CSRF is not a potential attack vector, so
+      the prevention is not enforced. This helps achieve backwards-compatibility
+      with existing automation that has not been updated to send the CSRF
+      prevention header.
+    </description>
+  </property>
+  <property>
+    <name>hbase.security.exec.permission.checks</name>
+    <value>false</value>
+    <description>
+      If this setting is enabled and ACL based access control is active (the
+      AccessController coprocessor is installed either as a system coprocessor
+      or on a table as a table coprocessor) then you must grant all relevant
+      users EXEC privilege if they require the ability to execute coprocessor
+      endpoint calls. EXEC privilege, like any other permission, can be
+      granted globally to a user, or to a user on a per table or per namespace
+      basis. For more information on coprocessor endpoints, see the coprocessor
+      section of the HBase online manual. For more information on granting or
+      revoking permissions using the AccessController, see the security
+      section of the HBase online manual.
+    </description>
+  </property>
+  <property>
+    <name>hbase.procedure.regionserver.classes</name>
+    <value></value>
+    <description>A comma-separated list of
+      org.apache.hadoop.hbase.procedure.RegionServerProcedureManager procedure managers that are
+      loaded by default on the active HRegionServer process. The lifecycle methods (init/start/stop)
+      will be called by the active HRegionServer process to perform the specific globally barriered
+      procedure. After implementing your own RegionServerProcedureManager, just put it in
+      HBase's classpath and add the fully qualified class name here.
+    </description>
+  </property>
+  <property>
+    <name>hbase.procedure.master.classes</name>
+    <value></value>
+    <description>A comma-separated list of
+      org.apache.hadoop.hbase.procedure.MasterProcedureManager procedure managers that are
+      loaded by default on the active HMaster process. A procedure is identified by its signature and
+      users can use the signature and an instant name to trigger an execution of a globally barriered
+      procedure. After implementing your own MasterProcedureManager, just put it in HBase's classpath
+      and add the fully qualified class name here.
+    </description>
+  </property>
+  <property>
+    <name>hbase.coordinated.state.manager.class</name>
+    <value>org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager</value>
+    <description>Fully qualified name of class implementing coordinated state manager.</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.storefile.refresh.period</name>
+    <value>0</value>
+    <description>
+      The period (in milliseconds) for refreshing the store files for the secondary regions. 0
+      means this feature is disabled. Secondary regions sees new files (from flushes and
+      compactions) from primary once the secondary region refreshes the list of files in the
+      region (there is no notification mechanism). But too frequent refreshes might cause
+      extra Namenode pressure. If the files cannot be refreshed for longer than HFile TTL
+      (hbase.master.hfilecleaner.ttl) the requests are rejected. Configuring HFile TTL to a larger
+      value is also recommended with this setting.
+    </description>
+  </property>
+  <property>
+    <name>hbase.region.replica.replication.enabled</name>
+    <value>false</value>
+    <description>
+      Whether asynchronous WAL replication to the secondary region replicas is enabled or not.
+      If this is enabled, a replication peer named "region_replica_replication" will be created
+      which will tail the logs and replicate the mutations to region replicas for tables that
+      have region replication > 1. If this is enabled once, disabling this replication also
+      requires disabling the replication peer using shell or Admin java class.
+      Replication to secondary region replicas works over standard inter-cluster replication.
+    </description>
+  </property>
+  <property>
+    <name>hbase.http.filter.initializers</name>
+    <value>org.apache.hadoop.hbase.http.lib.StaticUserWebFilter</value>
+    <description>
+      A comma separated list of class names. Each class in the list must extend
+      org.apache.hadoop.hbase.http.FilterInitializer. The corresponding Filter will
+      be initialized. Then, the Filter will be applied to all user facing jsp
+      and servlet web pages.
+      The ordering of the list defines the ordering of the filters.
+      The default StaticUserWebFilter add a user principal as defined by the
+      hbase.http.staticuser.user property.
+    </description>
+  </property>
+  <property>
+    <name>hbase.security.visibility.mutations.checkauths</name>
+    <value>false</value>
+    <description>
+      This property if enabled, will check whether the labels in the visibility
+      expression are associated with the user issuing the mutation
+    </description>
+  </property>
+  <property>
+    <name>hbase.http.max.threads</name>
+    <value>16</value>
+    <description>
+      The maximum number of threads that the HTTP Server will create in its
+      ThreadPool.
+    </description>
+  </property>
+  <property>
+    <name>hbase.replication.rpc.codec</name>
+    <value>org.apache.hadoop.hbase.codec.KeyValueCodecWithTags</value>
+    <description>
+      The codec that is to be used when replication is enabled so that
+      the tags are also replicated. This is used along with HFileV3 which
+      supports tags in them. If tags are not used or if the hfile version used
+      is HFileV2 then KeyValueCodec can be used as the replication codec. Note that
+      using KeyValueCodecWithTags for replication when there are no tags causes no harm.
+    </description>
+  </property>
+  <property>
+    <name>hbase.replication.source.maxthreads</name>
+    <value>10</value>
+    <description>
+      The maximum number of threads any replication source will use for
+      shipping edits to the sinks in parallel. This also limits the number of
+      chunks each replication batch is broken into. Larger values can improve
+      the replication throughput between the master and slave clusters. The
+      default of 10 will rarely need to be changed.
+    </description>
+  </property>
+  <!-- Static Web User Filter properties. -->
+  <property>
+    <name>hbase.http.staticuser.user</name>
+    <value>dr.stack</value>
+    <description>
+      The user name to filter as, on static web filters
+      while rendering content. An example use is the HDFS
+      web UI (user to be used for browsing files).
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.handler.abort.on.error.percent</name>
+    <value>0.5</value>
+    <description>The percent of region server RPC threads failed to abort RS.
+      -1 Disable aborting; 0 Abort if even a single handler has died;
+      0.x Abort only when this percent of handlers have died;
+      1 Abort only all of the handers have died.
+    </description>
+  </property>
+  <!-- Mob properties. -->
+  <property>
+    <name>hbase.mob.file.cache.size</name>
+    <value>1000</value>
+    <description>
+      Number of opened file handlers to cache.
+      A larger value will benefit reads by providing more file handlers per mob
+      file cache and would reduce frequent file opening and closing.
+      However, if this is set too high, this could lead to a "too many opened file handlers"
+      The default value is 1000.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.cache.evict.period</name>
+    <value>3600</value>
+    <description>
+      The amount of time in seconds before the mob cache evicts cached mob files.
+      The default value is 3600 seconds.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.cache.evict.remain.ratio</name>
+    <value>0.5f</value>
+    <description>
+      The ratio (between 0.0 and 1.0) of files that remains cached after an eviction
+      is triggered when the number of cached mob files exceeds the hbase.mob.file.cache.size.
+      The default value is 0.5f.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.mob.ttl.cleaner.period</name>
+    <value>86400</value>
+    <description>
+      The period that ExpiredMobFileCleanerChore runs. The unit is second.
+      The default value is one day. The MOB file name uses only the date part of
+      the file creation time in it. We use this time for deciding TTL expiry of
+      the files. So the removal of TTL expired files might be delayed. The max
+      delay might be 24 hrs.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.compaction.mergeable.threshold</name>
+    <value>1342177280</value>
+    <description>
+      If the size of a mob file is less than this value, it's regarded as a small
+      file and needs to be merged in mob compaction. The default value is 1280MB.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.delfile.max.count</name>
+    <value>3</value>
+    <description>
+      The max number of del files that is allowed in the mob compaction.
+      In the mob compaction, when the number of existing del files is larger than
+      this value, they are merged until number of del files is not larger this value.
+      The default value is 3.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.compaction.batch.size</name>
+    <value>100</value>
+    <description>
+      The max number of the mob files that is allowed in a batch of the mob compaction.
+      The mob compaction merges the small mob files to bigger ones. If the number of the
+      small files is very large, it could lead to a "too many opened file handlers" in the merge.
+      And the merge has to be split into batches. This value limits the number of mob files
+      that are selected in a batch of the mob compaction. The default value is 100.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.compaction.chore.period</name>
+    <value>604800</value>
+    <description>
+      The period that MobCompactionChore runs. The unit is second.
+      The default value is one week.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.compactor.class</name>
+    <value>org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactor</value>
+    <description>
+      Implementation of mob compactor, the default one is PartitionedMobCompactor.
+    </description>
+  </property>
+  <property>
+    <name>hbase.mob.compaction.threads.max</name>
+    <value>1</value>
+    <description>
+      The max number of threads used in MobCompactor.
+    </description>
+  </property>
+  <property>
+    <name>hbase.snapshot.master.timeout.millis</name>
+    <value>300000</value>
+    <description>
+      Timeout for master for the snapshot procedure execution.
+    </description>
+  </property>
+  <property>
+    <name>hbase.snapshot.region.timeout</name>
+    <value>300000</value>
+    <description>
+      Timeout for regionservers to keep threads in snapshot request pool waiting.
+    </description>
+  </property>
+  <property>
+    <name>hbase.rpc.rows.warning.threshold</name>
+    <value>5000</value>
+    <description>
+      Number of rows in a batch operation above which a warning will be logged.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.wait.on.service.seconds</name>
+    <value>30</value>
+    <description>Default is 5 minutes. Make it 30 seconds for tests. See
+      HBASE-19794 for some context.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.cleaner.snapshot.interval</name>
+    <value>1800000</value>
+    <description>
+      Snapshot Cleanup chore interval in milliseconds.
+      The cleanup thread keeps running at this interval
+      to find all snapshots that are expired based on TTL
+      and delete them.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.snapshot.ttl</name>
+    <value>0</value>
+    <description>
+      Default Snapshot TTL to be considered when the user does not specify TTL while
+      creating snapshot. Default value 0 indicates FOREVERE - snapshot should not be
+      automatically deleted until it is manually deleted
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.regions.recovery.check.interval</name>
+    <value>1200000</value>
+    <description>
+      Regions Recovery Chore interval in milliseconds.
+      This chore keeps running at this interval to
+      find all regions with configurable max store file ref count
+      and reopens them.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regions.recovery.store.file.ref.count</name>
+    <value>-1</value>
+    <description>
+      Very large number of ref count on a compacted
+      store file indicates that it is a ref leak
+      on that object(compacted store file).
+      Such files can not be removed after
+      it is invalidated via compaction.
+      Only way to recover in such scenario is to
+      reopen the region which can release
+      all resources, like the refcount,
+      leases, etc. This config represents Store files Ref
+      Count threshold value considered for reopening
+      regions. Any region with compacted store files
+      ref count > this value would be eligible for
+      reopening by master. Here, we get the max
+      refCount among all refCounts on all
+      compacted away store files that belong to a
+      particular region. Default value -1 indicates
+      this feature is turned off. Only positive
+      integer value should be provided to
+      enable this feature.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.slowlog.ringbuffer.size</name>
+    <value>256</value>
+    <description>
+      Default size of ringbuffer to be maintained by each RegionServer in order
+      to store online slowlog responses. This is an in-memory ring buffer of
+      requests that were judged to be too slow in addition to the responseTooSlow
+      logging. The in-memory representation would be complete.
+      For more details, please look into Doc Section:
+      Get Slow Response Log from shell
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.slowlog.buffer.enabled</name>
+    <value>false</value>
+    <description>
+      Indicates whether RegionServers have ring buffer running for storing
+      Online Slow logs in FIFO manner with limited entries. The size of
+      the ring buffer is indicated by config: hbase.regionserver.slowlog.ringbuffer.size
+      The default value is false, turn this on and get latest slowlog
+      responses with complete data.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.slowlog.systable.enabled</name>
+    <value>false</value>
+    <description>
+      Should be enabled only if hbase.regionserver.slowlog.buffer.enabled is enabled. If enabled
+      (true), all slow/large RPC logs would be persisted to system table hbase:slowlog (in addition
+      to in-memory ring buffer at each RegionServer). The records are stored in increasing
+      order of time. Operators can scan the table with various combination of ColumnValueFilter.
+      More details are provided in the doc section:
+      "Get Slow/Large Response Logs from System table hbase:slowlog"
+    </description>
+  </property>
+  <property>
+    <name>hbase.rpc.rows.size.threshold.reject</name>
+    <value>false</value>
+    <description>
+      If value is true, RegionServer will abort batch requests of Put/Delete with number of rows
+      in a batch operation exceeding threshold defined by value of config:
+      hbase.rpc.rows.warning.threshold. The default value is false and hence, by default, only
+      warning will be logged. This config should be turned on to prevent RegionServer from serving
+      very large batch size of rows and this way we can improve CPU usages by discarding
+      too large batch request.
+    </description>
+  </property>
+  <property>
+    <name>hbase.namedqueue.provider.classes</name>
+    <value>
+      org.apache.hadoop.hbase.namequeues.impl.SlowLogQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerDecisionQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerRejectionQueueService
+    </value>
+    <description>
+      Default values for NamedQueueService implementors. This comma separated full class names
+      represent all implementors of NamedQueueService that we would like to be invoked by
+      LogEvent handler service. One example of NamedQueue service is SlowLogQueueService which
+      is used to store slow/large RPC logs in ringbuffer at each RegionServer.
+      All implementors of NamedQueueService should be found under package:
+      "org.apache.hadoop.hbase.namequeues.impl"
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.balancer.decision.buffer.enabled</name>
+    <value>false</value>
+    <description>
+      Indicates whether active HMaster has ring buffer running for storing
+      balancer decisions in FIFO manner with limited entries. The size of
+      the ring buffer is indicated by config: hbase.master.balancer.decision.queue.size
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.balancer.rejection.buffer.enabled</name>
+    <value>false</value>
+    <description>
+      Indicates whether active HMaster has ring buffer running for storing
+      balancer rejection in FIFO manner with limited entries. The size of
+      the ring buffer is indicated by config: hbase.master.balancer.rejection.queue.size
+    </description>
+  </property>
+</configuration>
diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
index e64964ed94e9c..8c57dc84dead4 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.avro;
 
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.testutils.SchemaTestUtil;
 import org.apache.hudi.exception.SchemaCompatibilityException;
 
 import org.apache.avro.JsonProperties;
@@ -27,12 +28,14 @@
 import org.apache.avro.generic.GenericRecord;
 import org.junit.jupiter.api.Test;
 
+import java.io.IOException;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
@@ -88,6 +91,12 @@ public class TestHoodieAvroUtils {
       + "{\"name\":\"decimal_col\",\"type\":[\"null\","
       + "{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":8,\"scale\":4}],\"default\":null}]}";
 
+  private static String SCHEMA_WITH_NESTED_FIELD = "{\"name\":\"MyClass\",\"type\":\"record\",\"namespace\":\"com.acme.avro\",\"fields\":["
+      + "{\"name\":\"firstname\",\"type\":\"string\"},"
+      + "{\"name\":\"lastname\",\"type\":\"string\"},"
+      + "{\"name\":\"student\",\"type\":{\"name\":\"student\",\"type\":\"record\",\"fields\":["
+      + "{\"name\":\"firstname\",\"type\":[\"null\" ,\"string\"],\"default\": null},{\"name\":\"lastname\",\"type\":[\"null\" ,\"string\"],\"default\": null}]}}]}";
+
   @Test
   public void testPropsPresent() {
     Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(EXAMPLE_SCHEMA));
@@ -248,7 +257,7 @@ public void testGetNestedFieldVal() {
   }
 
   @Test
-  public void testGetNestedFieldValWithDecimalFiled() {
+  public void testGetNestedFieldValWithDecimalField() {
     GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(SCHEMA_WITH_DECIMAL_FIELD));
     rec.put("key_col", "key");
     BigDecimal bigDecimal = new BigDecimal("1234.5678");
@@ -264,4 +273,36 @@ public void testGetNestedFieldValWithDecimalFiled() {
     assertEquals(0, buffer.position());
   }
 
+  @Test
+  public void testGetNestedFieldSchema() throws IOException {
+    Schema schema = SchemaTestUtil.getEvolvedSchema();
+    GenericRecord rec = new GenericData.Record(schema);
+    rec.put("field1", "key1");
+    rec.put("field2", "val1");
+    rec.put("name", "val2");
+    rec.put("favorite_number", 2);
+    // test simple field schema
+    assertEquals(Schema.create(Schema.Type.STRING), getNestedFieldSchemaFromWriteSchema(rec.getSchema(), "field1"));
+
+    GenericRecord rec2 = new GenericData.Record(schema);
+    rec2.put("field1", "key1");
+    rec2.put("field2", "val1");
+    rec2.put("name", "val2");
+    rec2.put("favorite_number", 12);
+    // test comparison of non-string type
+    assertEquals(-1, GenericData.get().compare(rec.get("favorite_number"), rec2.get("favorite_number"), getNestedFieldSchemaFromWriteSchema(rec.getSchema(), "favorite_number")));
+
+    // test nested field schema
+    Schema nestedSchema = new Schema.Parser().parse(SCHEMA_WITH_NESTED_FIELD);
+    GenericRecord rec3 = new GenericData.Record(nestedSchema);
+    rec3.put("firstname", "person1");
+    rec3.put("lastname", "person2");
+    GenericRecord studentRecord = new GenericData.Record(rec3.getSchema().getField("student").schema());
+    studentRecord.put("firstname", "person1");
+    studentRecord.put("lastname", "person2");
+    rec3.put("student", studentRecord);
+
+    assertEquals(Schema.create(Schema.Type.STRING), getNestedFieldSchemaFromWriteSchema(rec3.getSchema(), "student.firstname"));
+    assertEquals(Schema.create(Schema.Type.STRING), getNestedFieldSchemaFromWriteSchema(nestedSchema, "student.firstname"));
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java
index cc59b46024792..190ad398e1b60 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java
@@ -19,12 +19,13 @@
 package org.apache.hudi.common.fs.inline;
 
 import org.apache.hudi.common.testutils.FileSystemTestUtils;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
+import org.apache.hudi.io.storage.HoodieHFileUtils;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@@ -39,10 +40,12 @@
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.UUID;
 
+import static org.apache.hadoop.hbase.CellComparatorImpl.COMPARATOR;
 import static org.apache.hudi.common.testutils.FileSystemTestUtils.FILE_SCHEME;
 import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM;
 import static org.apache.hudi.common.testutils.FileSystemTestUtils.getPhantomFile;
@@ -56,11 +59,12 @@
  */
 public class TestInLineFileSystemHFileInLining {
 
+  private static final String LOCAL_FORMATTER = "%010d";
+  private static final String VALUE_PREFIX = "value";
+  private static final int MIN_BLOCK_BYTES = 1024;
   private final Configuration inMemoryConf;
   private final Configuration inlineConf;
-  private final int minBlockSize = 1024;
-  private static final String LOCAL_FORMATTER = "%010d";
-  private int maxRows = 100 + RANDOM.nextInt(1000);
+  private final int maxRows = 100 + RANDOM.nextInt(1000);
   private Path generatedPath;
 
   public TestInLineFileSystemHFileInLining() {
@@ -88,12 +92,11 @@ public void testSimpleInlineFileSystem() throws IOException {
     CacheConfig cacheConf = new CacheConfig(inMemoryConf);
     FSDataOutputStream fout = createFSOutput(outerInMemFSPath, inMemoryConf);
     HFileContext meta = new HFileContextBuilder()
-        .withBlockSize(minBlockSize)
+        .withBlockSize(MIN_BLOCK_BYTES).withCellComparator(COMPARATOR)
         .build();
     HFile.Writer writer = HFile.getWriterFactory(inMemoryConf, cacheConf)
         .withOutputStream(fout)
         .withFileContext(meta)
-        .withComparator(new HoodieHBaseKVComparator())
         .create();
 
     writeRecords(writer);
@@ -110,9 +113,8 @@ public void testSimpleInlineFileSystem() throws IOException {
     InLineFileSystem inlineFileSystem = (InLineFileSystem) inlinePath.getFileSystem(inlineConf);
     FSDataInputStream fin = inlineFileSystem.open(inlinePath);
 
-    HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, inlineConf);
-    // Load up the index.
-    reader.loadFileInfo();
+    HFile.Reader reader =
+        HoodieHFileUtils.createHFileReader(inlineFileSystem, inlinePath, cacheConf, inlineConf);
     // Get a scanner that caches and that does not use pread.
     HFileScanner scanner = reader.getScanner(true, false);
     // Align scanner at start of the file.
@@ -121,21 +123,24 @@ public void testSimpleInlineFileSystem() throws IOException {
 
     Set<Integer> rowIdsToSearch = getRandomValidRowIds(10);
     for (int rowId : rowIdsToSearch) {
-      assertEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))),
+      KeyValue keyValue = new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId));
+      assertEquals(0, scanner.seekTo(keyValue),
           "location lookup failed");
       // read the key and see if it matches
-      ByteBuffer readKey = scanner.getKey();
-      assertArrayEquals(getSomeKey(rowId), Bytes.toBytes(readKey), "seeked key does not match");
-      scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
+      Cell cell = scanner.getCell();
+      byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
+      byte[] expectedKey = Arrays.copyOfRange(keyValue.getRowArray(), keyValue.getRowOffset(), keyValue.getRowOffset() + keyValue.getRowLength());
+      assertArrayEquals(expectedKey, key, "seeked key does not match");
+      scanner.seekTo(keyValue);
       ByteBuffer val1 = scanner.getValue();
-      scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
+      scanner.seekTo(keyValue);
       ByteBuffer val2 = scanner.getValue();
       assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2));
     }
 
     int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000};
     for (int rowId : invalidRowIds) {
-      assertNotEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))),
+      assertNotEquals(0, scanner.seekTo(new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId))),
           "location lookup should have failed");
     }
     reader.close();
@@ -155,7 +160,7 @@ private Set<Integer> getRandomValidRowIds(int count) {
   }
 
   private byte[] getSomeKey(int rowId) {
-    KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, Integer.valueOf(rowId)).getBytes(),
+    KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, rowId).getBytes(),
         Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put);
     return kv.getKey();
   }
@@ -169,17 +174,15 @@ private void writeRecords(HFile.Writer writer) throws IOException {
     writer.close();
   }
 
-  private int writeSomeRecords(HFile.Writer writer)
+  private void writeSomeRecords(HFile.Writer writer)
       throws IOException {
-    String value = "value";
     KeyValue kv;
     for (int i = 0; i < (maxRows); i++) {
-      String key = String.format(LOCAL_FORMATTER, Integer.valueOf(i));
+      String key = String.format(LOCAL_FORMATTER, i);
       kv = new KeyValue(Bytes.toBytes(key), Bytes.toBytes("family"), Bytes.toBytes("qual"),
-          Bytes.toBytes(value + key));
+          Bytes.toBytes(VALUE_PREFIX + key));
       writer.append(kv);
     }
-    return (maxRows);
   }
 
   private void readAllRecords(HFileScanner scanner) throws IOException {
@@ -187,30 +190,31 @@ private void readAllRecords(HFileScanner scanner) throws IOException {
   }
 
   // read the records and check
-  private int readAndCheckbytes(HFileScanner scanner, int start, int n)
+  private void readAndCheckbytes(HFileScanner scanner, int start, int n)
       throws IOException {
-    String value = "value";
     int i = start;
     for (; i < (start + n); i++) {
-      ByteBuffer key = scanner.getKey();
-      ByteBuffer val = scanner.getValue();
-      String keyStr = String.format(LOCAL_FORMATTER, Integer.valueOf(i));
-      String valStr = value + keyStr;
+      Cell cell = scanner.getCell();
+      byte[] key = Arrays.copyOfRange(
+          cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
+      byte[] val = Arrays.copyOfRange(
+          cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
+      String keyStr = String.format(LOCAL_FORMATTER, i);
+      String valStr = VALUE_PREFIX + keyStr;
       KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"),
           Bytes.toBytes("qual"), Bytes.toBytes(valStr));
-      byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(key), 0,
-          Bytes.toBytes(key).length).getKey();
-      assertArrayEquals(kv.getKey(), keyBytes,
-          "bytes for keys do not match " + keyStr + " " + Bytes.toString(Bytes.toBytes(key)));
-      byte[] valBytes = Bytes.toBytes(val);
-      assertArrayEquals(Bytes.toBytes(valStr), valBytes,
-          "bytes for vals do not match " + valStr + " " + Bytes.toString(valBytes));
+      byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(key, 0, key.length).getKey();
+      byte[] expectedKeyBytes = Arrays.copyOfRange(
+          kv.getRowArray(), kv.getRowOffset(), kv.getRowOffset() + kv.getRowLength());
+      assertArrayEquals(expectedKeyBytes, keyBytes,
+          "bytes for keys do not match " + keyStr + " " + Bytes.toString(key));
+      assertArrayEquals(Bytes.toBytes(valStr), val,
+          "bytes for vals do not match " + valStr + " " + Bytes.toString(val));
       if (!scanner.next()) {
         break;
       }
     }
     assertEquals(i, start + n - 1);
-    return (start + n);
   }
 
   private long generateOuterFile(Path outerPath, byte[] inlineBytes) throws IOException {
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
index e9b06e6d6397d..536fec609542f 100755
--- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
@@ -20,9 +20,9 @@
 
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.DeleteRecord;
 import org.apache.hudi.common.model.HoodieArchivedLogFile;
 import org.apache.hudi.common.model.HoodieAvroRecord;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -89,6 +89,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -574,12 +575,13 @@ public void testBasicAppendAndScanMultipleFiles(ExternalSpillableMap.DiskMapType
     writer.close();
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
     // scan all log blocks (across multiple log files)
+    List<String> logFilePaths = logFiles.stream()
+        .map(logFile -> logFile.getPath().toString()).collect(Collectors.toList());
+    assertTrue(logFilePaths.size() > 0);
     HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
         .withBasePath(basePath)
-        .withLogFilePaths(
-            logFiles.stream()
-                .map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()))
+        .withLogFilePaths(logFilePaths)
         .withReaderSchema(schema)
         .withLatestInstantTime("100")
         .withMaxMemorySizeInBytes(10240L)
@@ -589,6 +591,7 @@ public void testBasicAppendAndScanMultipleFiles(ExternalSpillableMap.DiskMapType
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(logFilePaths.get(0)).getParent()))
         .build();
 
     List<IndexedRecord> scannedRecords = new ArrayList<>();
@@ -803,6 +806,7 @@ public void testAvroLogRecordReaderBasic(ExternalSpillableMap.DiskMapType diskMa
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(200, scanner.getTotalLogRecords());
     Set<String> readKeys = new HashSet<>(200);
@@ -881,6 +885,7 @@ public void testAvroLogRecordReaderWithRollbackTombstone(ExternalSpillableMap.Di
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(200, scanner.getTotalLogRecords(), "We read 200 records from 2 write batches");
     Set<String> readKeys = new HashSet<>(200);
@@ -968,6 +973,7 @@ public void testAvroLogRecordReaderWithFailedPartialBlock(ExternalSpillableMap.D
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records");
     Set<String> readKeys = new HashSet<>(200);
@@ -1016,13 +1022,13 @@ public void testAvroLogRecordReaderWithDeleteAndRollback(ExternalSpillableMap.Di
             .collect(Collectors.toList());
 
     // Delete 50 keys
-    List<HoodieKey> deletedKeys = copyOfRecords1.stream()
-        .map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+    List<DeleteRecord> deletedRecords = copyOfRecords1.stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
             ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
         .collect(Collectors.toList()).subList(0, 50);
 
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedRecords.toArray(new DeleteRecord[50]), header);
     writer.appendBlock(deleteBlock);
 
     List<String> allLogFiles =
@@ -1046,6 +1052,7 @@ public void testAvroLogRecordReaderWithDeleteAndRollback(ExternalSpillableMap.Di
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
 
     assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
@@ -1063,7 +1070,7 @@ public void testAvroLogRecordReaderWithDeleteAndRollback(ExternalSpillableMap.Di
     });
     assertEquals(200, readKeys.size(), "Stream collect should return all 200 records");
     assertEquals(50, emptyPayloads.size(), "Stream collect should return all 50 records with empty payloads");
-    originalKeys.removeAll(deletedKeys);
+    originalKeys.removeAll(deletedRecords);
     Collections.sort(originalKeys);
     Collections.sort(readKeys);
     assertEquals(originalKeys, readKeys, "CompositeAvroLogReader should return 150 records from 2 versions");
@@ -1092,11 +1099,130 @@ public void testAvroLogRecordReaderWithDeleteAndRollback(ExternalSpillableMap.Di
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
     assertEquals(200, readKeys.size(), "Stream collect should return all 200 records after rollback of delete");
   }
 
+  @ParameterizedTest
+  @MethodSource("testArguments")
+  public void testAvroLogRecordReaderWithDisorderDelete(ExternalSpillableMap.DiskMapType diskMapType,
+                                                        boolean isCompressionEnabled,
+                                                        boolean readBlocksLazily)
+      throws IOException, URISyntaxException, InterruptedException {
+    Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
+    // Set a small threshold so that every block is a new version
+    Writer writer =
+        HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+            .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
+
+    // Write 1
+    List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
+    List<IndexedRecord> copyOfRecords1 = records1.stream()
+        .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
+    Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
+    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
+    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
+    writer.appendBlock(dataBlock);
+
+    // Write 2
+    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
+    List<IndexedRecord> records2 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
+    List<IndexedRecord> copyOfRecords2 = records2.stream()
+        .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
+    writer.appendBlock(dataBlock);
+
+    copyOfRecords1.addAll(copyOfRecords2);
+    List<String> originalKeys =
+        copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
+            .collect(Collectors.toList());
+
+    // Delete 10 keys
+    // Default orderingVal is 0, which means natural order, the DELETE records
+    // should overwrite the data records.
+    List<DeleteRecord> deleteRecords1 = copyOfRecords1.subList(0, 10).stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+            ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
+        .collect(Collectors.toList());
+
+    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
+    HoodieDeleteBlock deleteBlock1 = new HoodieDeleteBlock(deleteRecords1.toArray(new DeleteRecord[0]), header);
+    writer.appendBlock(deleteBlock1);
+
+    // Delete another 10 keys with -1 as orderingVal.
+    // The deletion should not work
+
+    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103");
+    HoodieDeleteBlock deleteBlock2 = new HoodieDeleteBlock(copyOfRecords1.subList(10, 20).stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+            ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString(), -1))).toArray(DeleteRecord[]::new), header);
+    writer.appendBlock(deleteBlock2);
+
+    // Delete another 10 keys with +1 as orderingVal.
+    // The deletion should work because the keys has greater ordering value.
+    List<DeleteRecord> deletedRecords3 = copyOfRecords1.subList(20, 30).stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+            ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString(), 1)))
+        .collect(Collectors.toList());
+
+    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "104");
+    HoodieDeleteBlock deleteBlock3 = new HoodieDeleteBlock(deletedRecords3.toArray(new DeleteRecord[0]), header);
+    writer.appendBlock(deleteBlock3);
+
+    List<String> allLogFiles =
+        FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
+            .map(s -> s.getPath().toString()).collect(Collectors.toList());
+
+    FileCreateUtils.createDeltaCommit(basePath, "100", fs);
+    FileCreateUtils.createDeltaCommit(basePath, "101", fs);
+    FileCreateUtils.createDeltaCommit(basePath, "102", fs);
+    FileCreateUtils.createDeltaCommit(basePath, "103", fs);
+    FileCreateUtils.createDeltaCommit(basePath, "104", fs);
+
+    HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
+        .withFileSystem(fs)
+        .withBasePath(basePath)
+        .withLogFilePaths(allLogFiles)
+        .withReaderSchema(schema)
+        .withLatestInstantTime("104")
+        .withMaxMemorySizeInBytes(10240L)
+        .withReadBlocksLazily(readBlocksLazily)
+        .withReverseReader(false)
+        .withBufferSize(bufferSize)
+        .withSpillableMapBasePath(BASE_OUTPUT_PATH)
+        .withDiskMapType(diskMapType)
+        .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
+        .build();
+
+    assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
+    final List<String> readKeys = new ArrayList<>(200);
+    final List<String> emptyPayloadKeys = new ArrayList<>();
+    scanner.forEach(s -> readKeys.add(s.getRecordKey()));
+    scanner.forEach(s -> {
+      try {
+        if (!s.getData().getInsertValue(schema).isPresent()) {
+          emptyPayloadKeys.add(s.getRecordKey());
+        }
+      } catch (IOException io) {
+        throw new UncheckedIOException(io);
+      }
+    });
+    assertEquals(200, readKeys.size(), "Stream collect should return all 200 records");
+    assertEquals(20, emptyPayloadKeys.size(), "Stream collect should return all 20 records with empty payloads");
+
+    originalKeys.removeAll(deleteRecords1.stream().map(DeleteRecord::getRecordKey).collect(Collectors.toSet()));
+    originalKeys.removeAll(deletedRecords3.stream().map(DeleteRecord::getRecordKey).collect(Collectors.toSet()));
+    readKeys.removeAll(emptyPayloadKeys);
+
+    Collections.sort(originalKeys);
+    Collections.sort(readKeys);
+    assertEquals(originalKeys, readKeys, "HoodieMergedLogRecordScanner should return 180 records from 4 versions");
+  }
+
   @ParameterizedTest
   @MethodSource("testArguments")
   public void testAvroLogRecordReaderWithFailedRollbacks(ExternalSpillableMap.DiskMapType diskMapType,
@@ -1131,12 +1257,12 @@ public void testAvroLogRecordReaderWithFailedRollbacks(ExternalSpillableMap.Disk
 
     // Delete 50 keys
     // Delete 50 keys
-    List<HoodieKey> deletedKeys = copyOfRecords1.stream()
-        .map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+    List<DeleteRecord> deleteRecords = copyOfRecords1.stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
             ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
         .collect(Collectors.toList()).subList(0, 50);
 
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deleteRecords.toArray(new DeleteRecord[50]), header);
     writer.appendBlock(deleteBlock);
 
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
@@ -1173,6 +1299,7 @@ public void testAvroLogRecordReaderWithFailedRollbacks(ExternalSpillableMap.Disk
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(0, scanner.getTotalLogRecords(), "We would have scanned 0 records because of rollback");
 
@@ -1208,11 +1335,11 @@ public void testAvroLogRecordReaderWithInsertDeleteAndRollback(ExternalSpillable
     writer.appendBlock(dataBlock);
 
     // Delete 50 keys
-    List<HoodieKey> deletedKeys = copyOfRecords1.stream()
-        .map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+    List<DeleteRecord> deleteRecords = copyOfRecords1.stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
             ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
         .collect(Collectors.toList()).subList(0, 50);
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deleteRecords.toArray(new DeleteRecord[50]), header);
     writer.appendBlock(deleteBlock);
 
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
@@ -1241,6 +1368,7 @@ public void testAvroLogRecordReaderWithInsertDeleteAndRollback(ExternalSpillable
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
     FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
@@ -1292,6 +1420,7 @@ public void testAvroLogRecordReaderWithInvalidRollback(ExternalSpillableMap.Disk
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(100, scanner.getTotalLogRecords(), "We still would read 100 records");
     final List<String> readKeys = new ArrayList<>(100);
@@ -1328,11 +1457,11 @@ public void testAvroLogRecordReaderWithInsertsDeleteAndRollback(ExternalSpillabl
 
     // Delete 50 keys
     // Delete 50 keys
-    List<HoodieKey> deletedKeys = copyOfRecords1.stream()
-        .map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
+    List<DeleteRecord> deleteRecords = copyOfRecords1.stream()
+        .map(s -> (DeleteRecord.create(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
             ((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
         .collect(Collectors.toList()).subList(0, 50);
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deleteRecords.toArray(new DeleteRecord[50]), header);
     writer.appendBlock(deleteBlock);
 
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
@@ -1362,6 +1491,7 @@ public void testAvroLogRecordReaderWithInsertsDeleteAndRollback(ExternalSpillabl
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
   }
@@ -1468,6 +1598,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(ExternalS
         .withSpillableMapBasePath(BASE_OUTPUT_PATH)
         .withDiskMapType(diskMapType)
         .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+        .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
         .build();
     assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
     FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
@@ -1542,6 +1673,7 @@ private void testAvroLogRecordReaderMergingMultipleLogFiles(int numRecordsInLog1
           .withSpillableMapBasePath(BASE_OUTPUT_PATH)
           .withDiskMapType(diskMapType)
           .withBitCaskDiskMapCompressionEnabled(isCompressionEnabled)
+          .withPartition(getRelativePartitionPath(new Path(basePath), new Path(allLogFiles.get(0)).getParent()))
           .build();
 
       assertEquals(Math.max(numRecordsInLog1, numRecordsInLog2), scanner.getNumMergedRecordsInLog(),
@@ -1632,40 +1764,39 @@ public void testBasicAppendAndReadInReverse(boolean readBlocksLazily)
 
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
 
-    HoodieLogFileReader reader = new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(),
-        fs.getFileStatus(writer.getLogFile().getPath()).getLen()), SchemaTestUtil.getSimpleSchema(),
-        bufferSize, readBlocksLazily, true);
+    HoodieLogFile logFile = new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen());
+    try (HoodieLogFileReader reader = new HoodieLogFileReader(fs, logFile, SchemaTestUtil.getSimpleSchema(), bufferSize, readBlocksLazily, true)) {
 
-    assertTrue(reader.hasPrev(), "Last block should be available");
-    HoodieLogBlock prevBlock = reader.prev();
-    HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
+      assertTrue(reader.hasPrev(), "Last block should be available");
+      HoodieLogBlock prevBlock = reader.prev();
+      HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
 
-    List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
-    assertEquals(copyOfRecords3.size(), recordsRead1.size(),
-        "Third records size should be equal to the written records size");
-    assertEquals(copyOfRecords3, recordsRead1,
-        "Both records lists should be the same. (ordering guaranteed)");
+      List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
+      assertEquals(copyOfRecords3.size(), recordsRead1.size(),
+          "Third records size should be equal to the written records size");
+      assertEquals(copyOfRecords3, recordsRead1,
+          "Both records lists should be the same. (ordering guaranteed)");
 
-    assertTrue(reader.hasPrev(), "Second block should be available");
-    prevBlock = reader.prev();
-    dataBlockRead = (HoodieDataBlock) prevBlock;
-    List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
-    assertEquals(copyOfRecords2.size(), recordsRead2.size(),
-        "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords2, recordsRead2,
-        "Both records lists should be the same. (ordering guaranteed)");
+      assertTrue(reader.hasPrev(), "Second block should be available");
+      prevBlock = reader.prev();
+      dataBlockRead = (HoodieDataBlock) prevBlock;
+      List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
+      assertEquals(copyOfRecords2.size(), recordsRead2.size(),
+          "Read records size should be equal to the written records size");
+      assertEquals(copyOfRecords2, recordsRead2,
+          "Both records lists should be the same. (ordering guaranteed)");
 
-    assertTrue(reader.hasPrev(), "First block should be available");
-    prevBlock = reader.prev();
-    dataBlockRead = (HoodieDataBlock) prevBlock;
-    List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
-    assertEquals(copyOfRecords1.size(), recordsRead3.size(),
-        "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords1, recordsRead3,
-        "Both records lists should be the same. (ordering guaranteed)");
+      assertTrue(reader.hasPrev(), "First block should be available");
+      prevBlock = reader.prev();
+      dataBlockRead = (HoodieDataBlock) prevBlock;
+      List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
+      assertEquals(copyOfRecords1.size(), recordsRead3.size(),
+          "Read records size should be equal to the written records size");
+      assertEquals(copyOfRecords1, recordsRead3,
+          "Both records lists should be the same. (ordering guaranteed)");
 
-    assertFalse(reader.hasPrev());
-    reader.close();
+      assertFalse(reader.hasPrev());
+    }
   }
 
   @ParameterizedTest
@@ -1713,19 +1844,20 @@ public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily)
     writer.close();
 
     // First round of reads - we should be able to read the first block and then EOF
-    HoodieLogFileReader reader =
-        new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(),
-            fs.getFileStatus(writer.getLogFile().getPath()).getLen()), schema, bufferSize, readBlocksLazily, true);
+    HoodieLogFile logFile = new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen());
 
-    assertTrue(reader.hasPrev(), "Last block should be available");
-    HoodieLogBlock block = reader.prev();
-    assertTrue(block instanceof HoodieDataBlock, "Last block should be datablock");
+    try (HoodieLogFileReader reader =
+        new HoodieLogFileReader(fs, logFile, schema, bufferSize, readBlocksLazily, true)) {
 
-    assertTrue(reader.hasPrev(), "Last block should be available");
-    assertThrows(CorruptedLogFileException.class, () -> {
-      reader.prev();
-    });
-    reader.close();
+      assertTrue(reader.hasPrev(), "Last block should be available");
+      HoodieLogBlock block = reader.prev();
+      assertTrue(block instanceof HoodieDataBlock, "Last block should be datablock");
+
+      assertTrue(reader.hasPrev(), "Last block should be available");
+      assertThrows(CorruptedLogFileException.class, () -> {
+        reader.prev();
+      });
+    }
   }
 
   @ParameterizedTest
@@ -1765,28 +1897,28 @@ public void testBasicAppendAndTraverseInReverse(boolean readBlocksLazily)
 
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
 
-    HoodieLogFileReader reader = new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(),
-        fs.getFileStatus(writer.getLogFile().getPath()).getLen()), SchemaTestUtil.getSimpleSchema(),
-        bufferSize, readBlocksLazily, true);
+    HoodieLogFile logFile = new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen());
+    try (HoodieLogFileReader reader =
+             new HoodieLogFileReader(fs, logFile, SchemaTestUtil.getSimpleSchema(), bufferSize, readBlocksLazily, true)) {
 
-    assertTrue(reader.hasPrev(), "Third block should be available");
-    reader.moveToPrev();
+      assertTrue(reader.hasPrev(), "Third block should be available");
+      reader.moveToPrev();
 
-    assertTrue(reader.hasPrev(), "Second block should be available");
-    reader.moveToPrev();
+      assertTrue(reader.hasPrev(), "Second block should be available");
+      reader.moveToPrev();
 
-    // After moving twice, this last reader.prev() should read the First block written
-    assertTrue(reader.hasPrev(), "First block should be available");
-    HoodieLogBlock prevBlock = reader.prev();
-    HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
-    List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
-    assertEquals(copyOfRecords1.size(), recordsRead.size(),
-        "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords1, recordsRead,
-        "Both records lists should be the same. (ordering guaranteed)");
+      // After moving twice, this last reader.prev() should read the First block written
+      assertTrue(reader.hasPrev(), "First block should be available");
+      HoodieLogBlock prevBlock = reader.prev();
+      HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
+      List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
+      assertEquals(copyOfRecords1.size(), recordsRead.size(),
+          "Read records size should be equal to the written records size");
+      assertEquals(copyOfRecords1, recordsRead,
+          "Both records lists should be the same. (ordering guaranteed)");
 
-    assertFalse(reader.hasPrev());
-    reader.close();
+      assertFalse(reader.hasPrev());
+    }
   }
 
   @Test
@@ -1886,11 +2018,16 @@ public void testDataBlockFormatAppendAndReadWithProjectedSchema(
 
   private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<IndexedRecord> records,
                                        Map<HeaderMetadataType, String> header) {
+    return getDataBlock(dataBlockType, records, header, new Path("dummy_path"));
+  }
+
+  private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<IndexedRecord> records,
+                                       Map<HeaderMetadataType, String> header, Path pathForReader) {
     switch (dataBlockType) {
       case AVRO_DATA_BLOCK:
         return new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
       case HFILE_DATA_BLOCK:
-        return new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ);
+        return new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ, pathForReader);
       case PARQUET_DATA_BLOCK:
         return new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
       default:
@@ -1916,7 +2053,7 @@ private static Stream<Arguments> testArguments() {
    * Utility to convert the given iterator to a List.
    */
   private static List<IndexedRecord> getRecords(HoodieDataBlock dataBlock) {
-    ClosableIterator<IndexedRecord> itr = dataBlock.getRecordItr();
+    ClosableIterator<IndexedRecord> itr = dataBlock.getRecordIterator();
 
     List<IndexedRecord> elements = new ArrayList<>();
     itr.forEachRemaining(elements::add);
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java
new file mode 100644
index 0000000000000..3ec15d4f65d12
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodiePartitionMetadata.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model;
+
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestHoodiePartitionMetadata extends HoodieCommonTestHarness {
+
+  FileSystem fs;
+
+  @BeforeEach
+  public void setupTest() throws IOException {
+    initMetaClient();
+    fs = metaClient.getFs();
+  }
+
+  static Stream<Arguments> formatProviderFn() {
+    return Stream.of(
+        Arguments.arguments(Option.empty()),
+        Arguments.arguments(Option.of(HoodieFileFormat.PARQUET)),
+        Arguments.arguments(Option.of(HoodieFileFormat.ORC))
+    );
+  }
+
+  @ParameterizedTest
+  @MethodSource("formatProviderFn")
+  public void testTextFormatMetaFile(Option<HoodieFileFormat> format) throws IOException {
+    // given
+    final Path partitionPath = new Path(basePath, "a/b/"
+        + format.map(Enum::name).orElse("text"));
+    fs.mkdirs(partitionPath);
+    final String commitTime = "000000000001";
+    HoodiePartitionMetadata writtenMetadata = new HoodiePartitionMetadata(metaClient.getFs(), commitTime, new Path(basePath), partitionPath, format);
+    writtenMetadata.trySave(0);
+
+    // when
+    HoodiePartitionMetadata readMetadata = new HoodiePartitionMetadata(metaClient.getFs(), new Path(metaClient.getBasePath(), partitionPath));
+
+    // then
+    assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath));
+    assertEquals(Option.of(commitTime), readMetadata.readPartitionCreatedCommitTime());
+    assertEquals(3, readMetadata.getPartitionDepth());
+  }
+
+  @Test
+  public void testErrorIfAbsent() throws IOException {
+    final Path partitionPath = new Path(basePath, "a/b/not-a-partition");
+    fs.mkdirs(partitionPath);
+    HoodiePartitionMetadata readMetadata = new HoodiePartitionMetadata(metaClient.getFs(), new Path(metaClient.getBasePath(), partitionPath));
+    assertThrows(HoodieException.class, readMetadata::readPartitionCreatedCommitTime);
+  }
+
+  @Test
+  public void testFileNames() {
+    assertEquals(new Path("/a/b/c/.hoodie_partition_metadata"), HoodiePartitionMetadata.textFormatMetaFilePath(new Path("/a/b/c")));
+    assertEquals(Arrays.asList(new Path("/a/b/c/.hoodie_partition_metadata.parquet"),
+        new Path("/a/b/c/.hoodie_partition_metadata.orc")), HoodiePartitionMetadata.baseFormatMetaFilePaths(new Path("/a/b/c")));
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java
index f21d8e6dc37e5..0defefe2ea4e4 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java
@@ -64,7 +64,7 @@ public void setUp() throws Exception {
   public void testCreate() throws IOException {
     assertTrue(fs.exists(new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE)));
     HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(5, config.getProps().size());
+    assertEquals(6, config.getProps().size());
   }
 
   @Test
@@ -77,7 +77,7 @@ public void testUpdate() throws IOException {
     assertTrue(fs.exists(cfgPath));
     assertFalse(fs.exists(backupCfgPath));
     HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(6, config.getProps().size());
+    assertEquals(7, config.getProps().size());
     assertEquals("test-table2", config.getTableName());
     assertEquals("new_field", config.getPreCombineField());
   }
@@ -90,7 +90,7 @@ public void testDelete() throws IOException {
     assertTrue(fs.exists(cfgPath));
     assertFalse(fs.exists(backupCfgPath));
     HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(4, config.getProps().size());
+    assertEquals(5, config.getProps().size());
     assertNull(config.getProps().getProperty("hoodie.invalid.config"));
     assertFalse(config.getProps().contains(HoodieTableConfig.ARCHIVELOG_FOLDER.key()));
   }
@@ -114,7 +114,7 @@ public void testReadsWithUpdateFailures() throws IOException {
     assertFalse(fs.exists(cfgPath));
     assertTrue(fs.exists(backupCfgPath));
     config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(5, config.getProps().size());
+    assertEquals(6, config.getProps().size());
   }
 
   @ParameterizedTest
@@ -132,6 +132,6 @@ public void testUpdateRecovery(boolean shouldPropsFileExist) throws IOException
     assertTrue(fs.exists(cfgPath));
     assertFalse(fs.exists(backupCfgPath));
     config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(5, config.getProps().size());
+    assertEquals(6, config.getProps().size());
   }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
new file mode 100644
index 0000000000000..59a24a79f013f
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.table;
+
+import org.apache.avro.Schema;
+
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.hudi.exception.HoodieIncompatibleSchemaException;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestTableSchemaResolver {
+
+  @Test
+  public void testRecreateSchemaWhenDropPartitionColumns() {
+    Schema originSchema = new Schema.Parser().parse(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA);
+
+    // case1
+    Option<String[]> emptyPartitionFieldsOpt = Option.empty();
+    Schema s1 = TableSchemaResolver.recreateSchemaWhenDropPartitionColumns(emptyPartitionFieldsOpt, originSchema);
+    assertEquals(originSchema, s1);
+
+    // case2
+    String[] pts1 = new String[0];
+    Schema s2 = TableSchemaResolver.recreateSchemaWhenDropPartitionColumns(Option.of(pts1), originSchema);
+    assertEquals(originSchema, s2);
+
+    // case3: partition_path is in originSchema
+    String[] pts2 = {"partition_path"};
+    Schema s3 = TableSchemaResolver.recreateSchemaWhenDropPartitionColumns(Option.of(pts2), originSchema);
+    assertEquals(originSchema, s3);
+
+    // case4: user_partition is not in originSchema
+    String[] pts3 = {"user_partition"};
+    Schema s4 = TableSchemaResolver.recreateSchemaWhenDropPartitionColumns(Option.of(pts3), originSchema);
+    assertNotEquals(originSchema, s4);
+    assertTrue(s4.getFields().stream().anyMatch(f -> f.name().equals("user_partition")));
+    Schema.Field f = s4.getField("user_partition");
+    assertEquals(f.schema().getType().getName(), "string");
+
+    // case5: user_partition is in originSchema, but partition_path is in originSchema
+    String[] pts4 = {"user_partition", "partition_path"};
+    try {
+      TableSchemaResolver.recreateSchemaWhenDropPartitionColumns(Option.of(pts3), originSchema);
+    } catch (HoodieIncompatibleSchemaException e) {
+      assertTrue(e.getMessage().contains("Partial partition fields are still in the schema"));
+    }
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
index 576cfd7cb0f3f..9ff17cdbd2688 100755
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
@@ -199,6 +199,46 @@ public void testTimelineOperations() {
     assertTrue(activeCommitTimeline.isBeforeTimelineStarts("00"));
   }
 
+  @Test
+  public void testGetContiguousCompletedWriteTimeline() {
+    // a mock timeline with holes
+    timeline = new MockHoodieTimeline(Stream.of("01", "03", "05", "07", "13", "15", "17"),
+        Stream.of("09", "11", "19"));
+    assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
+    assertEquals("07", timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
+
+    // add some instants where two are inflight and one of them (instant8 below) is not part of write timeline
+    HoodieInstant instant1 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "1");
+    HoodieInstant instant2 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "2");
+    HoodieInstant instant3 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "3");
+    HoodieInstant instant4 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "4");
+    HoodieInstant instant5 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "5");
+    HoodieInstant instant6 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "6");
+    HoodieInstant instant7 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "7");
+    HoodieInstant instant8 = new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, "8");
+
+    timeline = new HoodieActiveTimeline(metaClient);
+    timeline.createNewInstant(instant1);
+    timeline.createNewInstant(instant2);
+    timeline.createNewInstant(instant3);
+    timeline.createNewInstant(instant4);
+    timeline.createNewInstant(instant5);
+    timeline.createNewInstant(instant6);
+    timeline.createNewInstant(instant7);
+    timeline.createNewInstant(instant8);
+    timeline.setInstants(Stream.of(instant1, instant2, instant3, instant4, instant5, instant6, instant7, instant8).collect(Collectors.toList()));
+
+    assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
+    assertEquals(instant4.getTimestamp(), timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
+    // transition both inflight instants to complete
+    timeline.saveAsComplete(new HoodieInstant(true, instant5.getAction(), instant5.getTimestamp()), Option.empty());
+    timeline.saveAsComplete(new HoodieInstant(true, instant8.getAction(), instant8.getTimestamp()), Option.empty());
+    timeline = timeline.reload();
+    // instant8 in not considered in write timeline, so last completed instant in timeline should be instant7
+    assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
+    assertEquals(instant7.getTimestamp(), timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
+  }
+
   @Test
   public void testTimelineGetOperations() {
     List<HoodieInstant> allInstants = getAllInstants();
@@ -218,20 +258,19 @@ public void testTimelineGetOperations() {
 
     // Test that various types of getXXX operations from HoodieActiveTimeline
     // return the correct set of Instant
-    checkTimeline.accept(timeline.getCommitsTimeline(),
-            CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
-    checkTimeline.accept(timeline.getWriteTimeline(),
-            CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
+    checkTimeline.accept(timeline.getCommitsTimeline(), CollectionUtils.createSet(
+        HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
+    checkTimeline.accept(timeline.getWriteTimeline(), CollectionUtils.createSet(
+        HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
     checkTimeline.accept(timeline.getCommitTimeline(),  CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
     checkTimeline.accept(timeline.getDeltaCommitTimeline(), Collections.singleton(HoodieTimeline.DELTA_COMMIT_ACTION));
     checkTimeline.accept(timeline.getCleanerTimeline(), Collections.singleton(HoodieTimeline.CLEAN_ACTION));
     checkTimeline.accept(timeline.getRollbackTimeline(), Collections.singleton(HoodieTimeline.ROLLBACK_ACTION));
     checkTimeline.accept(timeline.getRestoreTimeline(), Collections.singleton(HoodieTimeline.RESTORE_ACTION));
     checkTimeline.accept(timeline.getSavePointTimeline(), Collections.singleton(HoodieTimeline.SAVEPOINT_ACTION));
-    checkTimeline.accept(timeline.getAllCommitsTimeline(),
-            CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION,
-                    HoodieTimeline.CLEAN_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION,
-                    HoodieTimeline.SAVEPOINT_ACTION, HoodieTimeline.ROLLBACK_ACTION));
+    checkTimeline.accept(timeline.getAllCommitsTimeline(), CollectionUtils.createSet(
+        HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.CLEAN_ACTION, HoodieTimeline.COMPACTION_ACTION,
+        HoodieTimeline.REPLACE_COMMIT_ACTION, HoodieTimeline.SAVEPOINT_ACTION, HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.INDEXING_ACTION));
 
     // Get some random Instants
     Random rand = new Random();
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java
index 8f5e5ae964f83..27dd9df5edd5d 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java
@@ -99,15 +99,6 @@ public static String markerFileName(String instantTime, String fileId, IOType io
     return String.format("%s_%s_%s%s%s.%s", fileId, WRITE_TOKEN, instantTime, fileExtension, HoodieTableMetaClient.MARKER_EXTN, ioType);
   }
 
-  private static void createMetaFile(String basePath, String instantTime, String suffix) throws IOException {
-    Path parentPath = Paths.get(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
-    Files.createDirectories(parentPath);
-    Path metaFilePath = parentPath.resolve(instantTime + suffix);
-    if (Files.notExists(metaFilePath)) {
-      Files.createFile(metaFilePath);
-    }
-  }
-
   private static void createMetaFile(String basePath, String instantTime, String suffix, FileSystem fs) throws IOException {
     org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
     if (!fs.exists(parentPath)) {
@@ -119,12 +110,20 @@ private static void createMetaFile(String basePath, String instantTime, String s
     }
   }
 
+  private static void createMetaFile(String basePath, String instantTime, String suffix) throws IOException {
+    createMetaFile(basePath, instantTime, suffix, "".getBytes());
+  }
+
   private static void createMetaFile(String basePath, String instantTime, String suffix, byte[] content) throws IOException {
     Path parentPath = Paths.get(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
     Files.createDirectories(parentPath);
     Path metaFilePath = parentPath.resolve(instantTime + suffix);
     if (Files.notExists(metaFilePath)) {
-      Files.write(metaFilePath, content);
+      if (content.length == 0) {
+        Files.createFile(metaFilePath);
+      } else {
+        Files.write(metaFilePath, content);
+      }
     }
   }
 
@@ -245,6 +244,10 @@ public static void createRequestedRollbackFile(String basePath, String instantTi
     createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, serializeRollbackPlan(plan).get());
   }
 
+  public static void createRequestedRollbackFile(String basePath, String instantTime, byte[] content) throws IOException {
+    createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, content);
+  }
+
   public static void createInflightRollbackFile(String basePath, String instantTime) throws IOException {
     createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
   }
@@ -274,10 +277,14 @@ public static void createInflightCompaction(String basePath, String instantTime)
     createAuxiliaryMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION);
   }
 
+  public static void createPendingInflightCompaction(String basePath, String instantTime) throws IOException {
+    createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION);
+  }
+
   public static void createPartitionMetaFile(String basePath, String partitionPath) throws IOException {
     Path parentPath = Paths.get(basePath, partitionPath);
     Files.createDirectories(parentPath);
-    Path metaFilePath = parentPath.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
+    Path metaFilePath = parentPath.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX);
     if (Files.notExists(metaFilePath)) {
       Files.createFile(metaFilePath);
     }
@@ -394,7 +401,7 @@ public static List<Path> getPartitionPaths(Path basePath) throws IOException {
     }
     return Files.list(basePath).filter(entry -> (!entry.getFileName().toString().equals(HoodieTableMetaClient.METAFOLDER_NAME)
         && !entry.getFileName().toString().contains("parquet") && !entry.getFileName().toString().contains("log"))
-        && !entry.getFileName().toString().endsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)).collect(Collectors.toList());
+        && !entry.getFileName().toString().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX)).collect(Collectors.toList());
   }
 
   /**
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
index 3e147b7fdd47c..cb4f5570743a6 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
@@ -205,7 +205,7 @@ public static void writePartitionMetadataDeprecated(FileSystem fs, String[] part
    */
   public void writePartitionMetadata(FileSystem fs, String[] partitionPaths, String basePath) {
     for (String partitionPath : partitionPaths) {
-      new HoodiePartitionMetadata(fs, "000", new Path(basePath), new Path(basePath, partitionPath)).trySave(0);
+      new HoodiePartitionMetadata(fs, "000", new Path(basePath), new Path(basePath, partitionPath), Option.empty()).trySave(0);
     }
   }
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
index 5f9aab84d0e6d..f0aae0a69d8b3 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
@@ -117,8 +117,12 @@
 
 public class HoodieTestTable {
 
+  public static final String PHONY_TABLE_SCHEMA =
+      "{\"namespace\": \"org.apache.hudi.avro.model\", \"type\": \"record\", \"name\": \"PhonyRecord\", \"fields\": []}";
+
   private static final Logger LOG = LogManager.getLogger(HoodieTestTable.class);
   private static final Random RANDOM = new Random();
+
   protected static HoodieTestTableState testTableState;
   private final List<String> inflightCommits = new ArrayList<>();
 
@@ -215,7 +219,7 @@ public HoodieCommitMetadata createCommitMetadata(WriteOperationType operationTyp
       writeStats.addAll(generateHoodieWriteStatForPartitionLogFiles(testTableState.getPartitionToLogFileInfoMap(commitTime), commitTime, bootstrap));
     }
     Map<String, String> extraMetadata = createImmutableMap("test", "test");
-    return buildMetadata(writeStats, partitionToReplaceFileIds, Option.of(extraMetadata), operationType, EMPTY_STRING, action);
+    return buildMetadata(writeStats, partitionToReplaceFileIds, Option.of(extraMetadata), operationType, PHONY_TABLE_SCHEMA, action);
   }
 
   public HoodieTestTable moveInflightCommitToComplete(String instantTime, HoodieCommitMetadata metadata) throws IOException {
@@ -257,6 +261,13 @@ public HoodieTestTable addReplaceCommit(
     return this;
   }
 
+  public HoodieTestTable addPendingReplace(String instantTime, Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata, Option<HoodieCommitMetadata> inflightReplaceMetadata) throws Exception {
+    createRequestedReplaceCommit(basePath, instantTime, requestedReplaceMetadata);
+    createInflightReplaceCommit(basePath, instantTime, inflightReplaceMetadata);
+    currentInstantTime = instantTime;
+    return this;
+  }
+
   public HoodieTestTable addRequestedReplace(String instantTime, Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata) throws Exception {
     createRequestedReplaceCommit(basePath, instantTime, requestedReplaceMetadata);
     currentInstantTime = instantTime;
@@ -284,7 +295,7 @@ public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPla
 
   public HoodieTestTable addClean(String instantTime) throws IOException {
     HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(EMPTY_STRING, EMPTY_STRING, EMPTY_STRING), EMPTY_STRING, new HashMap<>(),
-        CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
+        CleanPlanV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>());
     HoodieCleanStat cleanStats = new HoodieCleanStat(
         HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
         HoodieTestUtils.DEFAULT_PARTITION_PATHS[RANDOM.nextInt(HoodieTestUtils.DEFAULT_PARTITION_PATHS.length)],
@@ -298,7 +309,7 @@ public HoodieTestTable addClean(String instantTime) throws IOException {
 
   public Pair<HoodieCleanerPlan, HoodieCleanMetadata> getHoodieCleanMetadata(String commitTime, HoodieTestTableState testTableState) {
     HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant(commitTime, CLEAN_ACTION, EMPTY_STRING), EMPTY_STRING, new HashMap<>(),
-        CleanPlanV2MigrationHandler.VERSION, new HashMap<>());
+        CleanPlanV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>());
     List<HoodieCleanStat> cleanStats = new ArrayList<>();
     for (Map.Entry<String, List<String>> entry : testTableState.getPartitionToFileIdMapForCleaner(commitTime).entrySet()) {
       cleanStats.add(new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
@@ -669,7 +680,7 @@ public FileStatus[] listAllFilesInPartition(String partitionPath) throws IOExcep
           boolean toReturn = true;
           String filePath = entry.getPath().toString();
           String fileName = entry.getPath().getName();
-          if (fileName.equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE) || (!fileName.contains("log") && !fileName.contains("parquet"))
+          if (fileName.startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX) || (!fileName.contains("log") && !fileName.contains("parquet"))
               || filePath.contains("metadata")) {
             toReturn = false;
           } else {
@@ -772,7 +783,7 @@ public HoodieReplaceCommitMetadata doCluster(String commitTime, Map<String, List
       this.withBaseFilesInPartition(partition, testTableState.getPartitionToBaseFileInfoMap(commitTime).get(partition));
     }
     HoodieReplaceCommitMetadata replaceMetadata =
-        (HoodieReplaceCommitMetadata) buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), CLUSTER, EMPTY_STRING,
+        (HoodieReplaceCommitMetadata) buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), CLUSTER, PHONY_TABLE_SCHEMA,
             REPLACE_COMMIT_ACTION);
     addReplaceCommit(commitTime, Option.empty(), Option.empty(), replaceMetadata);
     return replaceMetadata;
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java
new file mode 100644
index 0000000000000..53ca9b2bebc1f
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.apache.hudi.common.util.CollectionUtils.batches;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+class TestCollectionUtils {
+
+  @Test
+  void getBatchesFromList() {
+    assertThrows(IllegalArgumentException.class, () -> {
+      batches(Collections.emptyList(), -1);
+    });
+
+    assertThrows(IllegalArgumentException.class, () -> {
+      batches(Collections.emptyList(), 0);
+    });
+
+    assertEquals(Collections.emptyList(), batches(Collections.emptyList(), 1));
+
+    List<List<Integer>> intsBatches1 = batches(Arrays.asList(1, 2, 3, 4, 5, 6), 3);
+    assertEquals(2, intsBatches1.size());
+    assertEquals(Arrays.asList(1, 2, 3), intsBatches1.get(0));
+    assertEquals(Arrays.asList(4, 5, 6), intsBatches1.get(1));
+
+    List<List<Integer>> intsBatches2 = batches(Arrays.asList(1, 2, 3, 4, 5, 6), 5);
+    assertEquals(2, intsBatches2.size());
+    assertEquals(Arrays.asList(1, 2, 3, 4, 5), intsBatches2.get(0));
+    assertEquals(Collections.singletonList(6), intsBatches2.get(1));
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java
index b402996fa78ae..5f1bcd3c066ef 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java
@@ -20,6 +20,9 @@
 
 import org.junit.jupiter.api.Test;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
@@ -61,4 +64,12 @@ public void testStringNullOrEmpty() {
     assertNotEquals(null, StringUtils.isNullOrEmpty("this is not empty"));
     assertTrue(StringUtils.isNullOrEmpty(""));
   }
+
+  @Test
+  public void testSplit() {
+    assertEquals(new ArrayList<>(), StringUtils.split(null, ","));
+    assertEquals(new ArrayList<>(), StringUtils.split("", ","));
+    assertEquals(Arrays.asList("a", "b", "c"), StringUtils.split("a,b, c", ","));
+    assertEquals(Arrays.asList("a", "b", "c"), StringUtils.split("a,b,, c ", ","));
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java
index 056f2121ce52b..eae1cdce8399b 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestTablePathUtils.java
@@ -17,6 +17,7 @@
 
 package org.apache.hudi.common.util;
 
+import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -24,9 +25,10 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
 
 import java.io.File;
 import java.io.IOException;
@@ -41,7 +43,7 @@ public final class TestTablePathUtils {
   private static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
 
   @TempDir
-  static File tempDir;
+  public File tempDir;
   private static FileSystem fs;
   private static Path tablePath;
   private static Path partitionPath1;
@@ -49,9 +51,12 @@ public final class TestTablePathUtils {
   private static Path filePath1;
   private static Path filePath2;
 
-  @BeforeAll
-  static void setup() throws IOException {
-    URI tablePathURI = Paths.get(tempDir.getAbsolutePath(),"test_table").toUri();
+  private void setup() throws IOException {
+    setup(Option.empty());
+  }
+
+  private void setup(Option<HoodieFileFormat> partitionMetafileFormat) throws IOException {
+    URI tablePathURI = Paths.get(tempDir.getAbsolutePath(), "test_table").toUri();
     tablePath = new Path(tablePathURI);
     fs = tablePath.getFileSystem(new Configuration());
 
@@ -69,10 +74,10 @@ static void setup() throws IOException {
     assertTrue(new File(partitionPathURI2).mkdirs());
 
     HoodiePartitionMetadata partitionMetadata1 = new HoodiePartitionMetadata(fs, Instant.now().toString(), tablePath,
-                                                                             partitionPath1);
+                                                                             partitionPath1, partitionMetafileFormat);
     partitionMetadata1.trySave(1);
     HoodiePartitionMetadata partitionMetadata2 = new HoodiePartitionMetadata(fs, Instant.now().toString(), tablePath,
-                                                                             partitionPath2);
+                                                                             partitionPath2, partitionMetafileFormat);
     partitionMetadata2.trySave(2);
 
     // Create files
@@ -87,12 +92,14 @@ static void setup() throws IOException {
 
   @Test
   void getTablePathFromTablePath() throws IOException {
+    setup();
     Option<Path> inferredTablePath = TablePathUtils.getTablePath(fs, tablePath);
     assertEquals(tablePath, inferredTablePath.get());
   }
 
   @Test
   void getTablePathFromMetadataFolderPath() throws IOException {
+    setup();
     Path metaFolder = new Path(tablePath, HoodieTableMetaClient.METAFOLDER_NAME);
     Option<Path> inferredTablePath = TablePathUtils.getTablePath(fs, metaFolder);
     assertEquals(tablePath, inferredTablePath.get());
@@ -100,6 +107,7 @@ void getTablePathFromMetadataFolderPath() throws IOException {
 
   @Test
   void getTablePathFromMetadataSubFolderPath() throws IOException {
+    setup();
     Path auxFolder = new Path(tablePath, HoodieTableMetaClient.AUXILIARYFOLDER_NAME);
     assertEquals(tablePath, TablePathUtils.getTablePath(fs, auxFolder).get());
 
@@ -117,8 +125,10 @@ void getTablePathFromMetadataSubFolderPath() throws IOException {
     assertEquals(metadataTableFolder, TablePathUtils.getTablePath(fs, metadataTablePartitionFolder).get());
   }
 
-  @Test
-  void getTablePathFromPartitionFolderPath() throws IOException {
+  @ParameterizedTest
+  @EnumSource(value = HoodieFileFormat.class, names = {"PARQUET", "ORC"})
+  void getTablePathFromPartitionFolderPath(HoodieFileFormat partitionMetafileFormat) throws IOException {
+    setup(Option.of(partitionMetafileFormat));
     Option<Path> inferredTablePath = TablePathUtils.getTablePath(fs, partitionPath1);
     assertEquals(tablePath, inferredTablePath.get());
 
@@ -128,6 +138,7 @@ void getTablePathFromPartitionFolderPath() throws IOException {
 
   @Test
   void getTablePathFromFilePath() throws IOException {
+    setup();
     Option<Path> inferredTablePath = TablePathUtils.getTablePath(fs, filePath1);
     assertEquals(tablePath, inferredTablePath.get());
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/TestSerDeHelper.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/TestSerDeHelper.java
new file mode 100644
index 0000000000000..5a337004812ca
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/TestSerDeHelper.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema;
+
+import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.TreeMap;
+
+public class TestSerDeHelper {
+
+  @Test
+  public void testComplexSchema2Json() {
+    InternalSchema internalSchema = new InternalSchema(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(7, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(8, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(9, 10, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(11, false, "lat", Types.FloatType.get()), Types.Field.get(12, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(13, true,
+            Types.RecordType.get(Types.Field.get(14, false, "x", Types.LongType.get()), Types.Field.get(15, false, "y", Types.LongType.get())))),
+        Types.Field.get(5, false,"doubles", Types.ArrayType.get(16, false, Types.DoubleType.get())),
+        Types.Field.get(6, true, "properties", Types.MapType.get(17, 18, Types.StringType.get(), Types.StringType.get()))
+    );
+    // test schema2json
+    String result = SerDeHelper.toJson(internalSchema);
+    InternalSchema convertedSchema = SerDeHelper.fromJson(result).get();
+    Assertions.assertEquals(internalSchema, convertedSchema);
+    // test schemas2json
+    String results = SerDeHelper.toJson(Arrays.asList(internalSchema));
+    TreeMap<Long, InternalSchema> convertedSchemas = SerDeHelper.parseSchemas(results);
+    Assertions.assertEquals(1, convertedSchemas.size());
+  }
+
+  @Test
+  public void testPrimitive2Json() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "bool", Types.BooleanType.get()),
+        Types.Field.get(1, "int", Types.IntType.get()),
+        Types.Field.get(2, "long", Types.LongType.get()),
+        Types.Field.get(3, "float", Types.FloatType.get()),
+        Types.Field.get(4, "double", Types.DoubleType.get()),
+        Types.Field.get(5, "date", Types.DateType.get()),
+        Types.Field.get(6, "time", Types.TimeType.get()),
+        Types.Field.get(7, "timestamp", Types.TimestampType.get()),
+        Types.Field.get(8, "string", Types.StringType.get()),
+        Types.Field.get(9, "uuid", Types.UUIDType.get()),
+        Types.Field.get(10, "fixed", Types.FixedType.getFixed(10)),
+        Types.Field.get(11, "binary", Types.BinaryType.get()),
+        Types.Field.get(12, "decimal", Types.DecimalType.get(10, 2))
+    }));
+    InternalSchema internalSchema = new InternalSchema(record.fields());
+    String result = SerDeHelper.toJson(internalSchema);
+    InternalSchema convertedSchema = SerDeHelper.fromJson(result).get();
+    Assertions.assertEquals(internalSchema, convertedSchema);
+  }
+
+  @Test
+  public void testSearchSchema() {
+    List schemas = new ArrayList<>();
+    for (int i = 0; i < 100; i++) {
+      schemas.add(new InternalSchema(i * 10,
+          Arrays.asList(Types.Field.get(1, true, "schema" + i * 10, Types.LongType.get()))));
+    }
+
+    Assertions.assertEquals(InternalSchemaUtils.searchSchema(0, schemas).getRecord().fields().get(0),
+        Types.Field.get(1, true, "schema" + 0, Types.LongType.get()));
+
+    Assertions.assertEquals(InternalSchemaUtils.searchSchema(9, schemas).getRecord().fields().get(0),
+        Types.Field.get(1, true, "schema" + 0, Types.LongType.get()));
+
+    Assertions.assertEquals(InternalSchemaUtils.searchSchema(99, schemas).getRecord().fields().get(0),
+        Types.Field.get(1, true, "schema" + 90, Types.LongType.get()));
+
+    Assertions.assertEquals(InternalSchemaUtils.searchSchema(9999, schemas).getRecord().fields().get(0),
+        Types.Field.get(1, true, "schema" + 990, Types.LongType.get()));
+  }
+
+  @Test
+  public void testInheritSchemas() {
+    List schemas = new ArrayList<>();
+    for (int i = 0; i < 2; i++) {
+      schemas.add(new InternalSchema(i,
+          Arrays.asList(Types.Field.get(1, true, "schema" + i, Types.LongType.get()))));
+    }
+    String oldSchemas = SerDeHelper.toJson(schemas);
+    InternalSchema newSchema = new InternalSchema(3,
+        Arrays.asList(Types.Field.get(1, true, "schema" + 3, Types.LongType.get())));
+
+    String finalResult = SerDeHelper.inheritSchemas(newSchema, oldSchemas);
+    // convert back
+    Assertions.assertEquals(SerDeHelper.parseSchemas(finalResult).size(), 3);
+  }
+}
+
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/action/TestMergeSchema.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/action/TestMergeSchema.java
new file mode 100644
index 0000000000000..3118e143870e3
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/action/TestMergeSchema.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Types;
+
+import org.apache.hudi.internal.schema.utils.SchemaChangeUtils;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+public class TestMergeSchema {
+
+  @Test
+  public void testPrimitiveMerge() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "col1", Types.BooleanType.get()),
+        Types.Field.get(1, "col2", Types.IntType.get()),
+        Types.Field.get(2, "col3", Types.LongType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get())}));
+
+    InternalSchema oldSchema = new InternalSchema(record.fields());
+    // add c1 after 'col1', and c2 before 'col3'
+    TableChanges.ColumnAddChange addChange = TableChanges.ColumnAddChange.get(oldSchema);
+    addChange.addColumns("c1", Types.BooleanType.get(), "add c1 after col1");
+    addChange.addPositionChange("c1", "col1", "after");
+    addChange.addColumns("c2", Types.IntType.get(), "add c2 before col3");
+    addChange.addPositionChange("c2", "col3", "before");
+    InternalSchema newAddSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, addChange);
+    TableChanges.ColumnDeleteChange deleteChange = TableChanges.ColumnDeleteChange.get(newAddSchema);
+    deleteChange.deleteColumn("col1");
+    deleteChange.deleteColumn("col3");
+    InternalSchema newDeleteSchema = SchemaChangeUtils.applyTableChanges2Schema(newAddSchema, deleteChange);
+
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(newDeleteSchema);
+    updateChange.updateColumnType("col2", Types.LongType.get())
+        .updateColumnComment("col2", "alter col2 comments")
+        .renameColumn("col2", "colx").addPositionChange("col2",
+        "col4", "after");
+    InternalSchema updateSchema = SchemaChangeUtils.applyTableChanges2Schema(newDeleteSchema, updateChange);
+
+    // add col1 again
+    TableChanges.ColumnAddChange addChange1 = TableChanges.ColumnAddChange.get(updateSchema);
+    addChange1.addColumns("col1", Types.BooleanType.get(), "add new col1");
+    InternalSchema finalSchema = SchemaChangeUtils.applyTableChanges2Schema(updateSchema, addChange1);
+    // merge schema by using columnType from query schema
+    InternalSchema mergeSchema = new InternalSchemaMerger(oldSchema, finalSchema, true, false).mergeSchema();
+
+    InternalSchema checkedSchema = new InternalSchema(Arrays.asList(new Types.Field[] {
+        Types.Field.get(4, true, "c1", Types.BooleanType.get(), "add c1 after col1"),
+        Types.Field.get(5, true, "c2", Types.IntType.get(), "add c2 before col3"),
+        Types.Field.get(3, true, "col4", Types.FloatType.get()),
+        Types.Field.get(1, true, "col2", Types.LongType.get(), "alter col2 comments"),
+        Types.Field.get(6, true, "col1suffix", Types.BooleanType.get(), "add new col1")
+    }));
+    Assertions.assertEquals(mergeSchema, checkedSchema);
+
+    // merge schema by using columnType from file schema
+    InternalSchema mergeSchema1 = new InternalSchemaMerger(oldSchema, finalSchema, true, true).mergeSchema();
+    InternalSchema checkedSchema1 = new InternalSchema(Arrays.asList(new Types.Field[] {
+        Types.Field.get(4, true, "c1", Types.BooleanType.get(), "add c1 after col1"),
+        Types.Field.get(5, true, "c2", Types.IntType.get(), "add c2 before col3"),
+        Types.Field.get(3, true, "col4", Types.FloatType.get()),
+        Types.Field.get(1, true, "col2", Types.IntType.get(), "alter col2 comments"),
+        Types.Field.get(6, true, "col1suffix", Types.BooleanType.get(), "add new col1")
+    }));
+    Assertions.assertEquals(mergeSchema1, checkedSchema1);
+  }
+}
+
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/action/TestTableChanges.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/action/TestTableChanges.java
new file mode 100644
index 0000000000000..5bf817f4d8b28
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/action/TestTableChanges.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.action;
+
+import org.apache.hudi.internal.schema.HoodieSchemaException;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Types;
+
+import org.apache.hudi.internal.schema.utils.SchemaChangeUtils;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+
+import java.util.Arrays;
+
+public class TestTableChanges {
+
+  @Test
+  public void testPrimitiveAdd() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "col1", Types.BooleanType.get()),
+        Types.Field.get(1, "col2", Types.IntType.get()),
+        Types.Field.get(2, "col3", Types.LongType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get())}));
+
+    Types.RecordType checkRecord = Types.RecordType.get(Arrays.asList(new Types.Field[]  {
+        Types.Field.get(0, "col1", Types.BooleanType.get()),
+        Types.Field.get(4, true, "c1", Types.BooleanType.get(), "add c1 after col1"),
+        Types.Field.get(1, "col2", Types.IntType.get()),
+        Types.Field.get(5, true, "c2", Types.IntType.get(), "add c2 before col3"),
+        Types.Field.get(2, "col3", Types.LongType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get())}));
+
+    InternalSchema oldSchema = new InternalSchema(record.fields());
+    // add c1 after 'col1', and c2 before 'col3'
+    TableChanges.ColumnAddChange addChange = TableChanges.ColumnAddChange.get(oldSchema);
+    addChange.addColumns("c1", Types.BooleanType.get(), "add c1 after col1");
+    // check repeated add.
+    Assertions.assertThrows(HoodieSchemaException.class, () -> addChange.addColumns("c1", Types.BooleanType.get(), "add c1 after col1"));
+    addChange.addPositionChange("c1", "col1", "after");
+    addChange.addColumns("c2", Types.IntType.get(), "add c2 before col3");
+    addChange.addPositionChange("c2", "col3", "before");
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, addChange);
+    Assertions.assertEquals(newSchema.getRecord(), checkRecord);
+  }
+
+  @Test
+  public void testNestAdd() {
+    InternalSchema oldSchema = new InternalSchema(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(7, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(8, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(9, 10, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(11, false, "lat", Types.FloatType.get()), Types.Field.get(12, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(13, true,
+            Types.RecordType.get(Types.Field.get(14, false, "x", Types.LongType.get()), Types.Field.get(15, false, "y", Types.LongType.get())))),
+        Types.Field.get(5, false,"doubles", Types.ArrayType.get(16, false, Types.DoubleType.get())),
+        Types.Field.get(6, true, "properties", Types.MapType.get(17, 18, Types.StringType.get(), Types.StringType.get()))
+    );
+
+    TableChanges.ColumnAddChange addChange = TableChanges.ColumnAddChange.get(oldSchema);
+    // add c1 first
+    addChange.addColumns("c1", Types.StringType.get(), "add c1 first");
+    addChange.addPositionChange("c1", "id", "before");
+    //add preferences.cx before preferences.feature2
+    addChange.addColumns("preferences", "cx", Types.BooleanType.get(), "add preferences.cx before preferences.feature2");
+    // check repeated add.
+    Assertions.assertThrows(HoodieSchemaException.class, () -> addChange.addColumns("preferences", "cx", Types.BooleanType.get(), "add preferences.cx before preferences.feature2"));
+    addChange.addPositionChange("preferences.cx", "preferences.feature2", "before");
+    // add locations.value.lax before locations.value.long
+    addChange.addColumns("locations.value", "lax", Types.BooleanType.get(), "add locations.value.lax before locations.value.long");
+    addChange.addPositionChange("locations.value.lax", "locations.value.long", "before");
+    //
+    // add points.element.z after points.element.y
+    addChange.addColumns("points.element", "z", Types.BooleanType.get(), "add points.element.z after points.element.y");
+    addChange.addPositionChange("points.element.z", "points.element.y", "after");
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, addChange);
+    InternalSchema checkedSchema = new InternalSchema(
+        Types.Field.get(19, true, "c1", Types.StringType.get(), "add c1 first"),
+        Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(7, false, "feature1", Types.BooleanType.get()),
+                Types.Field.get(20, true, "cx", Types.BooleanType.get(), "add preferences.cx before preferences.feature2"),
+                Types.Field.get(8, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(9, 10, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(11, false, "lat", Types.FloatType.get()),
+                Types.Field.get(21, true, "lax", Types.BooleanType.get(), "add locations.value.lax before locations.value.long"),
+                Types.Field.get(12, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(13, true,
+            Types.RecordType.get(Types.Field.get(14, false, "x", Types.LongType.get()),
+                Types.Field.get(15, false, "y", Types.LongType.get()),
+                Types.Field.get(22, true, "z", Types.BooleanType.get(), "add points.element.z after points.element.y")))),
+        Types.Field.get(5, false,"doubles", Types.ArrayType.get(16, false, Types.DoubleType.get())),
+        Types.Field.get(6, true, "properties", Types.MapType.get(17, 18, Types.StringType.get(), Types.StringType.get()))
+    );
+    Assertions.assertEquals(newSchema.getRecord(), checkedSchema.getRecord());
+  }
+
+  @Test
+  public void testPrimitiveDelete() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "col1", Types.BooleanType.get()),
+        Types.Field.get(1, "col2", Types.IntType.get()),
+        Types.Field.get(2, "col3", Types.LongType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get())}));
+    InternalSchema oldSchema = new InternalSchema(record.fields());
+    TableChanges.ColumnDeleteChange deleteChange = TableChanges.ColumnDeleteChange.get(oldSchema);
+    deleteChange.deleteColumn("col1");
+    // check repeated delete.
+    // deletechange can handle deleting the same column multiple times, only keep one operation.
+    deleteChange.deleteColumn("col1");
+    deleteChange.deleteColumn("col3");
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, deleteChange);
+    Types.RecordType checkRecord = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(1, "col2", Types.IntType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get())}));
+    Assertions.assertEquals(newSchema.getRecord(), checkRecord);
+  }
+
+  @Test
+  public void testNestDelete() {
+    InternalSchema oldSchema = new InternalSchema(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(5, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(6, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(7, 8, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(9, false, "lat", Types.FloatType.get()), Types.Field.get(10, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(11, true,
+            Types.RecordType.get(Types.Field.get(12, false, "x", Types.LongType.get()), Types.Field.get(13, false, "y", Types.LongType.get()))))
+    );
+    TableChanges.ColumnDeleteChange deleteChange = TableChanges.ColumnDeleteChange.get(oldSchema);
+    deleteChange.deleteColumn("data");
+    deleteChange.deleteColumn("preferences.feature2");
+    deleteChange.deleteColumn("preferences.feature2");
+    deleteChange.deleteColumn("locations.value.lat");
+    deleteChange.deleteColumn("points.element.y");
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, deleteChange);
+    InternalSchema checkedSchema = new InternalSchema(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(5, false, "feature1",
+                Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(7, 8, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(10, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(11, true,
+            Types.RecordType.get(Types.Field.get(12, false, "x", Types.LongType.get()))))
+    );
+    Assertions.assertEquals(newSchema.getRecord(), checkedSchema.getRecord());
+  }
+
+  @Test
+  public void testPrimitiveUpdate() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "col1", Types.BooleanType.get()),
+        Types.Field.get(1, "col2", Types.IntType.get()),
+        Types.Field.get(2, "col3", Types.LongType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get())}));
+    InternalSchema oldSchema = new InternalSchema(record.fields());
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(oldSchema);
+    updateChange.updateColumnType("col2", Types.LongType.get())
+        .updateColumnComment("col2", "alter col2 comments")
+        .renameColumn("col2", "colx").addPositionChange("col2", "col4", "after");
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, updateChange);
+    Types.RecordType checkedRecord = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "col1", Types.BooleanType.get()),
+        Types.Field.get(2, "col3", Types.LongType.get()),
+        Types.Field.get(3, "col4", Types.FloatType.get()),
+        Types.Field.get(1, true, "colx", Types.LongType.get(), "alter col2 comments")}));
+    Assertions.assertEquals(newSchema.getRecord(), checkedRecord);
+  }
+
+  @Test
+  public void testNestUpdate() {
+    InternalSchema oldSchema = new InternalSchema(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(5, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(6, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(7, 8, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(9, false, "lat", Types.FloatType.get()), Types.Field.get(10, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(11, true,
+            Types.RecordType.get(Types.Field.get(12, false, "x", Types.LongType.get()), Types.Field.get(13, false, "y", Types.LongType.get()))))
+    );
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(oldSchema);
+    updateChange
+        .updateColumnNullability("id", true)
+        .renameColumn("id", "idx")
+        .addPositionChange("data", "points", "after");
+    updateChange
+        .updateColumnComment("preferences.feature1", "add feature1 comment")
+        .renameColumn("preferences.feature1", "f1")
+        .addPositionChange("preferences.feature1", "preferences.feature1", "first");
+    updateChange.updateColumnComment("locations.value.lat", "add lat comment")
+        .renameColumn("locations.value.lat", "lax")
+        .addPositionChange("locations.value.lat", "locations.value.lat", "first");
+    updateChange.renameColumn("points.element.x", "z")
+        .addPositionChange("points.element.x", "points.element.y", "after");
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, updateChange);
+    InternalSchema checkSchema = new InternalSchema(Types.Field.get(0, true, "idx", Types.IntType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(5, false, "f1",
+                Types.BooleanType.get(), "add feature1 comment"), Types.Field.get(6, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(7, 8, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(9, false, "lax", Types.FloatType.get(), "add lat comment"), Types.Field.get(10, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(11, true,
+            Types.RecordType.get(Types.Field.get(13, false, "y", Types.LongType.get()), Types.Field.get(12, false, "z", Types.LongType.get())))),
+        Types.Field.get(1, true, "data", Types.StringType.get())
+    );
+    Assertions.assertEquals(newSchema.getRecord(), checkSchema.getRecord());
+  }
+}
+
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java
new file mode 100644
index 0000000000000..9db05b31e3e80
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.io;
+
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.utils.SerDeHelper;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Tests {@link FileBasedInternalSchemaStorageManager}.
+ */
+public class TestFileBasedInternalSchemaStorageManager extends HoodieCommonTestHarness {
+  private HoodieActiveTimeline timeline;
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initMetaClient();
+  }
+
+  @Test
+  public void testPersistAndReadHistorySchemaStr() throws IOException {
+    timeline = new HoodieActiveTimeline(metaClient);
+    FileBasedInternalSchemaStorageManager fm = new FileBasedInternalSchemaStorageManager(metaClient);
+    InternalSchema currentSchema = getSimpleSchema();
+    currentSchema.setSchemaId(0L);
+    // save first schema.
+    fm.persistHistorySchemaStr("0000", SerDeHelper.inheritSchemas(currentSchema, ""));
+    // Simulate commit.
+    simulateCommit("0000");
+    metaClient.reloadActiveTimeline();
+    // try to read schema
+    InternalSchema readSchema = fm.getSchemaByKey("0").get();
+    assertEquals(currentSchema, readSchema);
+    // save history schema again
+    InternalSchema secondSchema = getSimpleSchema();
+    secondSchema.setSchemaId(1L);
+    fm.persistHistorySchemaStr("0001", SerDeHelper.inheritSchemas(secondSchema, fm.getHistorySchemaStr()));
+    // Simulate commit.
+    simulateCommit("0001");
+    metaClient.reloadActiveTimeline();
+    // try to read schema
+    assertEquals(secondSchema, fm.getSchemaByKey("1").get());
+
+    // test write failed and residual file clean.
+    InternalSchema thirdSchema = getSimpleSchema();
+    thirdSchema.setSchemaId(2L);
+    fm.persistHistorySchemaStr("0002", SerDeHelper.inheritSchemas(thirdSchema, fm.getHistorySchemaStr()));
+    // do not simulate commit "0002", so current save file will be residual files.
+    // try 4st persist
+    InternalSchema lastSchema = getSimpleSchema();
+    lastSchema.setSchemaId(3L);
+    fm.persistHistorySchemaStr("0004", SerDeHelper.inheritSchemas(lastSchema, fm.getHistorySchemaStr()));
+    simulateCommit("0004");
+    metaClient.reloadActiveTimeline();
+    // now the residual file created by 3st persist should be removed.
+    File f = new File(metaClient.getSchemaFolderName() + File.separator + "0002.schemacommit");
+    assertTrue(!f.exists());
+    assertEquals(lastSchema, fm.getSchemaByKey("3").get());
+  }
+
+  private void simulateCommit(String commitTime) {
+    if (timeline == null) {
+      timeline = new HoodieActiveTimeline(metaClient);
+    }
+    HoodieInstant instant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, commitTime);
+    timeline.createNewInstant(instant);
+    timeline.transitionRequestedToInflight(instant, Option.empty());
+    timeline.saveAsComplete(new HoodieInstant(true, instant.getAction(), instant.getTimestamp()),
+        Option.empty());
+  }
+
+  private InternalSchema getSimpleSchema() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "bool", Types.BooleanType.get()),
+        Types.Field.get(1, "int", Types.IntType.get()),
+    }));
+    return new InternalSchema(record.fields());
+  }
+}
+
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java
new file mode 100644
index 0000000000000..d116697b8dc4a
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.utils;
+
+import org.apache.avro.JsonProperties;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.InternalSchemaBuilder;
+import org.apache.hudi.internal.schema.Type;
+import org.apache.hudi.internal.schema.Types;
+import org.apache.hudi.internal.schema.action.TableChanges;
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class TestAvroSchemaEvolutionUtils {
+
+  @Test
+  public void testPrimitiveTypes() {
+    Schema[] avroPrimitives = new Schema[] {
+        Schema.create(Schema.Type.BOOLEAN),
+        Schema.create(Schema.Type.INT),
+        Schema.create(Schema.Type.LONG),
+        Schema.create(Schema.Type.FLOAT),
+        Schema.create(Schema.Type.DOUBLE),
+        LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)),
+        LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG)),
+        LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)),
+        Schema.create(Schema.Type.STRING),
+        LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)),
+        Schema.createFixed("fixed_12", null, null, 12),
+        Schema.create(Schema.Type.BYTES),
+        LogicalTypes.decimal(9, 4).addToSchema(Schema.createFixed("decimal_9_4", null, null, 4))};
+
+    Type[] primitiveTypes = new Type[] {
+        Types.BooleanType.get(),
+        Types.IntType.get(),
+        Types.LongType.get(),
+        Types.FloatType.get(),
+        Types.DoubleType.get(),
+        Types.DateType.get(),
+        Types.TimeType.get(),
+        Types.TimestampType.get(),
+        Types.StringType.get(),
+        Types.UUIDType.get(),
+        Types.FixedType.getFixed(12),
+        Types.BinaryType.get(),
+        Types.DecimalType.get(9, 4)
+    };
+
+    for (int i = 0; i < primitiveTypes.length; i++) {
+      Type convertPrimitiveResult = AvroInternalSchemaConverter.convertToField(avroPrimitives[i]);
+      Assertions.assertEquals(convertPrimitiveResult, primitiveTypes[i]);
+      Schema convertResult = AvroInternalSchemaConverter.convert(primitiveTypes[i], "t1");
+      Assertions.assertEquals(convertResult, avroPrimitives[i]);
+    }
+  }
+
+  @Test
+  public void testRecordAndPrimitiveTypes() {
+    Types.RecordType record = Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "bool", Types.BooleanType.get()),
+        Types.Field.get(1, "int", Types.IntType.get()),
+        Types.Field.get(2, "long", Types.LongType.get()),
+        Types.Field.get(3, "float", Types.FloatType.get()),
+        Types.Field.get(4, "double", Types.DoubleType.get()),
+        Types.Field.get(5, "date", Types.DateType.get()),
+        Types.Field.get(6, "time", Types.TimeType.get()),
+        Types.Field.get(7, "timestamp", Types.TimestampType.get()),
+        Types.Field.get(8, "string", Types.StringType.get()),
+        Types.Field.get(9, "uuid", Types.UUIDType.get()),
+        Types.Field.get(10, "fixed", Types.FixedType.getFixed(10)),
+        Types.Field.get(11, "binary", Types.BinaryType.get()),
+        Types.Field.get(12, "decimal", Types.DecimalType.get(10, 2))
+    }));
+
+    Schema schema = create("t1",
+        new Schema.Field("bool", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.BOOLEAN)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("int", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.INT)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("long", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.LONG)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("float", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.FLOAT)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("double", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.DOUBLE)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("date", AvroInternalSchemaConverter.nullableSchema(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT))), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("time", AvroInternalSchemaConverter.nullableSchema(LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG))), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("timestamp", AvroInternalSchemaConverter.nullableSchema(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG))), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("string", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.STRING)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("uuid", AvroInternalSchemaConverter.nullableSchema(LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16))), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("fixed", AvroInternalSchemaConverter.nullableSchema(Schema.createFixed("fixed_10", null, null, 10)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("binary", AvroInternalSchemaConverter.nullableSchema(Schema.create(Schema.Type.BYTES)), null, JsonProperties.NULL_VALUE),
+        new Schema.Field("decimal", AvroInternalSchemaConverter.nullableSchema(LogicalTypes.decimal(10, 2)
+            .addToSchema(Schema.createFixed("decimal_10_2", null, null, 5))), null, JsonProperties.NULL_VALUE));
+    Schema convertedSchema = AvroInternalSchemaConverter.convert(record, "t1");
+    Assertions.assertEquals(convertedSchema, schema);
+    Types.RecordType convertedRecord = AvroInternalSchemaConverter.convert(schema).getRecord();
+    Assertions.assertEquals(convertedRecord, record);
+  }
+
+  private Schema create(String name, Schema.Field... fields) {
+    return Schema.createRecord(name, null, null, false, Arrays.asList(fields));
+  }
+
+  @Test
+  public void testArrayType() {
+    Type arrayNestRecordType = Types.ArrayType.get(1, false,
+        Types.RecordType.get(Arrays.asList(Types.Field.get(2, false, "a", Types.FloatType.get()),
+            Types.Field.get(3, false, "b", Types.FloatType.get()))));
+
+    Schema schema = SchemaBuilder.array().items(create("t1",
+        new Schema.Field("a", Schema.create(Schema.Type.FLOAT), null, null),
+        new Schema.Field("b", Schema.create(Schema.Type.FLOAT), null, null)));
+    Schema convertedSchema = AvroInternalSchemaConverter.convert(arrayNestRecordType, "t1");
+    Assertions.assertEquals(convertedSchema, schema);
+    Types.ArrayType convertedRecord = (Types.ArrayType) AvroInternalSchemaConverter.convertToField(schema);
+    Assertions.assertEquals(convertedRecord, arrayNestRecordType);
+  }
+
+  @Test
+  public void testComplexConvert() {
+    String schemaStr = "{\"type\":\"record\",\"name\":\"newTableName\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"data\","
+        + "\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"preferences\",\"type\":[\"null\","
+        + "{\"type\":\"record\",\"name\":\"newTableName_preferences\",\"fields\":[{\"name\":\"feature1\","
+        + "\"type\":\"boolean\"},{\"name\":\"feature2\",\"type\":[\"null\",\"boolean\"],\"default\":null}]}],"
+        + "\"default\":null},{\"name\":\"locations\",\"type\":{\"type\":\"map\",\"values\":{\"type\":\"record\","
+        + "\"name\":\"newTableName_locations\",\"fields\":[{\"name\":\"lat\",\"type\":\"float\"},{\"name\":\"long\","
+        + "\"type\":\"float\"}]}}},{\"name\":\"points\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\","
+        + "{\"type\":\"record\",\"name\":\"newTableName_points\",\"fields\":[{\"name\":\"x\",\"type\":\"long\"},"
+        + "{\"name\":\"y\",\"type\":\"long\"}]}]}],\"default\":null},{\"name\":\"doubles\",\"type\":{\"type\":\"array\",\"items\":\"double\"}},"
+        + "{\"name\":\"properties\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[\"null\",\"string\"]}],\"default\":null}]}";
+    Schema schema = new Schema.Parser().parse(schemaStr);
+
+    InternalSchema internalSchema = new InternalSchema(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(7, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(8, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(9, 10, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(11, false, "lat", Types.FloatType.get()), Types.Field.get(12, false, "long", Types.FloatType.get())), false)),
+        Types.Field.get(4, true, "points", Types.ArrayType.get(13, true,
+            Types.RecordType.get(Types.Field.get(14, false, "x", Types.LongType.get()), Types.Field.get(15, false, "y", Types.LongType.get())))),
+        Types.Field.get(5, false,"doubles", Types.ArrayType.get(16, false, Types.DoubleType.get())),
+        Types.Field.get(6, true, "properties", Types.MapType.get(17, 18, Types.StringType.get(), Types.StringType.get()))
+    );
+
+    Type convertRecord = AvroInternalSchemaConverter.convert(schema).getRecord();
+    Assertions.assertEquals(convertRecord, internalSchema.getRecord());
+    Assertions.assertEquals(schema, AvroInternalSchemaConverter.convert(internalSchema, "newTableName"));
+  }
+
+  @Test
+  public void testRefreshNewId() {
+    Types.RecordType record = Types.RecordType.get(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(4, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(5, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false, "locations", Types.MapType.get(6, 7, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(8, false, "lat", Types.FloatType.get()), Types.Field.get(9, false, "long", Types.FloatType.get())), false))
+    );
+    AtomicInteger newId = new AtomicInteger(100);
+    Types.RecordType recordWithNewId = (Types.RecordType) InternalSchemaBuilder.getBuilder().refreshNewId(record, newId);
+
+    Types.RecordType newRecord = Types.RecordType.get(Types.Field.get(100, false, "id", Types.IntType.get()),
+        Types.Field.get(101, true, "data", Types.StringType.get()),
+        Types.Field.get(102, true, "preferences",
+            Types.RecordType.get(Types.Field.get(104, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(105, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(103, false, "locations", Types.MapType.get(106, 107, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(108, false, "lat", Types.FloatType.get()), Types.Field.get(109, false, "long", Types.FloatType.get())), false))
+    );
+    Assertions.assertEquals(newRecord, recordWithNewId);
+  }
+
+  /**
+   * test record data type changes.
+   * int => long/float/double/string
+   * long => float/double/string
+   * float => double/String
+   * double => String/Decimal
+   * Decimal => Decimal/String
+   * String => date/decimal
+   * date => String
+   */
+  @Test
+  public void testReWriteRecordWithTypeChanged() {
+    Schema avroSchema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"h0_record\",\"namespace\":\"hoodie.h0\",\"fields\""
+        + ":[{\"name\":\"id\",\"type\":[\"null\",\"int\"],\"default\":null},"
+        + "{\"name\":\"comb\",\"type\":[\"null\",\"int\"],\"default\":null},"
+        + "{\"name\":\"com1\",\"type\":[\"null\",\"int\"],\"default\":null},"
+        + "{\"name\":\"col0\",\"type\":[\"null\",\"int\"],\"default\":null},"
+        + "{\"name\":\"col1\",\"type\":[\"null\",\"long\"],\"default\":null},"
+        + "{\"name\":\"col11\",\"type\":[\"null\",\"long\"],\"default\":null},"
+        + "{\"name\":\"col12\",\"type\":[\"null\",\"long\"],\"default\":null},"
+        + "{\"name\":\"col2\",\"type\":[\"null\",\"float\"],\"default\":null},"
+        + "{\"name\":\"col21\",\"type\":[\"null\",\"float\"],\"default\":null},"
+        + "{\"name\":\"col3\",\"type\":[\"null\",\"double\"],\"default\":null},"
+        + "{\"name\":\"col31\",\"type\":[\"null\",\"double\"],\"default\":null},"
+        + "{\"name\":\"col4\",\"type\":[\"null\",{\"type\":\"fixed\",\"name\":\"fixed\",\"namespace\":\"hoodie.h0.h0_record.col4\","
+        + "\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":4}],\"default\":null},"
+        + "{\"name\":\"col41\",\"type\":[\"null\",{\"type\":\"fixed\",\"name\":\"fixed\",\"namespace\":\"hoodie.h0.h0_record.col41\","
+        + "\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":4}],\"default\":null},"
+        + "{\"name\":\"col5\",\"type\":[\"null\",\"string\"],\"default\":null},"
+        + "{\"name\":\"col51\",\"type\":[\"null\",\"string\"],\"default\":null},"
+        + "{\"name\":\"col6\",\"type\":[\"null\",{\"type\":\"int\",\"logicalType\":\"date\"}],\"default\":null},"
+        + "{\"name\":\"col7\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}],\"default\":null},"
+        + "{\"name\":\"col8\",\"type\":[\"null\",\"boolean\"],\"default\":null},"
+        + "{\"name\":\"col9\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"par\",\"type\":[\"null\",{\"type\":\"int\",\"logicalType\":\"date\"}],\"default\":null}]}");
+    // create a test record with avroSchema
+    GenericData.Record avroRecord = new GenericData.Record(avroSchema);
+    avroRecord.put("id", 1);
+    avroRecord.put("comb", 100);
+    avroRecord.put("com1", -100);
+    avroRecord.put("col0", 256);
+    avroRecord.put("col1", 1000L);
+    avroRecord.put("col11", -100L);
+    avroRecord.put("col12", 2000L);
+    avroRecord.put("col2", -5.001f);
+    avroRecord.put("col21", 5.001f);
+    avroRecord.put("col3", 12.999d);
+    avroRecord.put("col31", 9999.999d);
+    Schema currentDecimalType = avroSchema.getField("col4").schema().getTypes().get(1);
+    BigDecimal bd = new BigDecimal("123.456").setScale(((LogicalTypes.Decimal) currentDecimalType.getLogicalType()).getScale());
+    avroRecord.put("col4", HoodieAvroUtils.DECIMAL_CONVERSION.toFixed(bd, currentDecimalType, currentDecimalType.getLogicalType()));
+    Schema currentDecimalType1 = avroSchema.getField("col41").schema().getTypes().get(1);
+    BigDecimal bd1 = new BigDecimal("7890.456").setScale(((LogicalTypes.Decimal) currentDecimalType1.getLogicalType()).getScale());
+    avroRecord.put("col41", HoodieAvroUtils.DECIMAL_CONVERSION.toFixed(bd1, currentDecimalType1, currentDecimalType1.getLogicalType()));
+
+    avroRecord.put("col5", "2011-01-01");
+    avroRecord.put("col51", "199.342");
+    avroRecord.put("col6", 18987);
+    avroRecord.put("col7", 1640491505000000L);
+    avroRecord.put("col8", false);
+    ByteBuffer bb = ByteBuffer.wrap(new byte[] {97, 48, 53});
+    avroRecord.put("col9", bb);
+    Assertions.assertEquals(GenericData.get().validate(avroSchema, avroRecord), true);
+    InternalSchema internalSchema = AvroInternalSchemaConverter.convert(avroSchema);
+    // do change type operation
+    TableChanges.ColumnUpdateChange updateChange = TableChanges.ColumnUpdateChange.get(internalSchema);
+    updateChange
+        .updateColumnType("id", Types.LongType.get())
+        .updateColumnType("comb", Types.FloatType.get())
+        .updateColumnType("com1", Types.DoubleType.get())
+        .updateColumnType("col0", Types.StringType.get())
+        .updateColumnType("col1", Types.FloatType.get())
+        .updateColumnType("col11", Types.DoubleType.get())
+        .updateColumnType("col12", Types.StringType.get())
+        .updateColumnType("col2", Types.DoubleType.get())
+        .updateColumnType("col21", Types.StringType.get())
+        .updateColumnType("col3", Types.StringType.get())
+        .updateColumnType("col31", Types.DecimalType.get(18, 9))
+        .updateColumnType("col4", Types.DecimalType.get(18, 9))
+        .updateColumnType("col41", Types.StringType.get())
+        .updateColumnType("col5", Types.DateType.get())
+        .updateColumnType("col51", Types.DecimalType.get(18, 9))
+        .updateColumnType("col6", Types.StringType.get());
+    InternalSchema newSchema = SchemaChangeUtils.applyTableChanges2Schema(internalSchema, updateChange);
+    Schema newAvroSchema = AvroInternalSchemaConverter.convert(newSchema, avroSchema.getName());
+    GenericRecord newRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(avroRecord, newAvroSchema);
+
+    Assertions.assertEquals(GenericData.get().validate(newAvroSchema, newRecord), true);
+  }
+
+  @Test
+  public void testReWriteNestRecord() {
+    Types.RecordType record = Types.RecordType.get(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(5, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(6, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false,"doubles", Types.ArrayType.get(7, false, Types.DoubleType.get())),
+        Types.Field.get(4, false, "locations", Types.MapType.get(8, 9, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(10, false, "lat", Types.FloatType.get()), Types.Field.get(11, false, "long", Types.FloatType.get())), false))
+    );
+    Schema schema = AvroInternalSchemaConverter.convert(record, "test1");
+    GenericData.Record avroRecord = new GenericData.Record(schema);
+    GenericData.get().validate(schema, avroRecord);
+    avroRecord.put("id", 2);
+    avroRecord.put("data", "xs");
+    // fill record type
+    GenericData.Record preferencesRecord = new GenericData.Record(AvroInternalSchemaConverter.convert(record.fieldType("preferences"), "test1_preferences"));
+    preferencesRecord.put("feature1", false);
+    preferencesRecord.put("feature2", true);
+    Assertions.assertEquals(GenericData.get().validate(AvroInternalSchemaConverter.convert(record.fieldType("preferences"), "test1_preferences"), preferencesRecord), true);
+    avroRecord.put("preferences", preferencesRecord);
+    // fill mapType
+    Map<String, GenericData.Record> locations = new HashMap<>();
+    Schema mapSchema = AvroInternalSchemaConverter.convert(((Types.MapType)record.field("locations").type()).valueType(), "test1_locations");
+    GenericData.Record locationsValue = new GenericData.Record(mapSchema);
+    locationsValue.put("lat", 1.2f);
+    locationsValue.put("long", 1.4f);
+    GenericData.Record locationsValue1 = new GenericData.Record(mapSchema);
+    locationsValue1.put("lat", 2.2f);
+    locationsValue1.put("long", 2.4f);
+    locations.put("key1", locationsValue);
+    locations.put("key2", locationsValue1);
+    avroRecord.put("locations", locations);
+
+    List<Double> doubles = new ArrayList<>();
+    doubles.add(2.0d);
+    doubles.add(3.0d);
+    avroRecord.put("doubles", doubles);
+
+    // do check
+    Assertions.assertEquals(GenericData.get().validate(schema, avroRecord), true);
+    // create newSchema
+    Types.RecordType newRecord = Types.RecordType.get(
+        Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(
+                Types.Field.get(5, false, "feature1", Types.BooleanType.get()),
+                Types.Field.get(5, true, "featurex", Types.BooleanType.get()),
+                Types.Field.get(6, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false,"doubles", Types.ArrayType.get(7, false, Types.DoubleType.get())),
+        Types.Field.get(4, false, "locations", Types.MapType.get(8, 9, Types.StringType.get(),
+            Types.RecordType.get(
+                Types.Field.get(10, true, "laty", Types.FloatType.get()),
+                Types.Field.get(11, false, "long", Types.FloatType.get())), false)
+        )
+    );
+
+    Schema newAvroSchema = AvroInternalSchemaConverter.convert(newRecord, schema.getName());
+    GenericRecord newAvroRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(avroRecord, newAvroSchema);
+    // test the correctly of rewrite
+    Assertions.assertEquals(GenericData.get().validate(newAvroSchema, newAvroRecord), true);
+  }
+
+  @Test
+  public void testEvolutionSchemaFromNewAvroSchema() {
+    Types.RecordType oldRecord = Types.RecordType.get(
+        Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(
+                Types.Field.get(5, false, "feature1", Types.BooleanType.get()),
+                Types.Field.get(6, true, "featurex", Types.BooleanType.get()),
+                Types.Field.get(7, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false,"doubles", Types.ArrayType.get(8, false, Types.DoubleType.get())),
+        Types.Field.get(4, false, "locations", Types.MapType.get(9, 10, Types.StringType.get(),
+            Types.RecordType.get(
+                Types.Field.get(11, false, "laty", Types.FloatType.get()),
+                Types.Field.get(12, false, "long", Types.FloatType.get())), false)
+        )
+    );
+    InternalSchema oldSchema = new InternalSchema(oldRecord.fields());
+    Types.RecordType evolvedRecord = Types.RecordType.get(
+        Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(
+                Types.Field.get(5, false, "feature1", Types.BooleanType.get()),
+                Types.Field.get(5, true, "featurex", Types.BooleanType.get()),
+                Types.Field.get(6, true, "feature2", Types.BooleanType.get()),
+                Types.Field.get(5, true, "feature3", Types.BooleanType.get()))),
+        Types.Field.get(3, false,"doubles", Types.ArrayType.get(7, false, Types.DoubleType.get())),
+        Types.Field.get(4, false, "locations", Types.MapType.get(8, 9, Types.StringType.get(),
+            Types.RecordType.get(
+                Types.Field.get(10, false, "laty", Types.FloatType.get()),
+                Types.Field.get(11, false, "long", Types.FloatType.get())), false)
+        ),
+        Types.Field.get(0, false, "add1", Types.IntType.get()),
+        Types.Field.get(2, true, "addStruct",
+            Types.RecordType.get(
+                Types.Field.get(5, false, "nest1", Types.BooleanType.get()),
+                Types.Field.get(5, true, "nest2", Types.BooleanType.get())))
+    );
+    evolvedRecord = (Types.RecordType)InternalSchemaBuilder.getBuilder().refreshNewId(evolvedRecord, new AtomicInteger(0));
+    Schema evolvedAvroSchema = AvroInternalSchemaConverter.convert(evolvedRecord, "test1");
+    InternalSchema result = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(evolvedAvroSchema, oldSchema);
+    Types.RecordType checkedRecord = Types.RecordType.get(
+        Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(
+                Types.Field.get(5, false, "feature1", Types.BooleanType.get()),
+                Types.Field.get(6, true, "featurex", Types.BooleanType.get()),
+                Types.Field.get(7, true, "feature2", Types.BooleanType.get()),
+                Types.Field.get(17, true, "feature3", Types.BooleanType.get()))),
+        Types.Field.get(3, false,"doubles", Types.ArrayType.get(8, false, Types.DoubleType.get())),
+        Types.Field.get(4, false, "locations", Types.MapType.get(9, 10, Types.StringType.get(),
+            Types.RecordType.get(
+                Types.Field.get(11, false, "laty", Types.FloatType.get()),
+                Types.Field.get(12, false, "long", Types.FloatType.get())), false)
+        ),
+        Types.Field.get(13, true, "add1", Types.IntType.get()),
+        Types.Field.get(14, true, "addStruct",
+            Types.RecordType.get(
+                Types.Field.get(15, false, "nest1", Types.BooleanType.get()),
+                Types.Field.get(16, true, "nest2", Types.BooleanType.get())))
+    );
+    Assertions.assertEquals(result.getRecord(), checkedRecord);
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestInternalSchemaUtils.java b/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestInternalSchemaUtils.java
new file mode 100644
index 0000000000000..a940a4f1451b9
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestInternalSchemaUtils.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal.schema.utils;
+
+import org.apache.hudi.internal.schema.InternalSchema;
+import org.apache.hudi.internal.schema.InternalSchemaBuilder;
+import org.apache.hudi.internal.schema.Types;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+public class TestInternalSchemaUtils {
+  @Test
+  public void testPruneSchema() {
+    Types.RecordType record = getSimpleRecordType();
+    InternalSchema originSchema = new InternalSchema(record.fields());
+    List<Integer> prunedCols = new ArrayList<>();
+    prunedCols.add(4);
+    prunedCols.add(3);
+    prunedCols.add(0);
+    prunedCols.add(2);
+    InternalSchema prunedSchema = InternalSchemaUtils.pruneInternalSchemaByID(originSchema, prunedCols, null);
+    InternalSchema checkedSchema = new InternalSchema(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "bool", Types.BooleanType.get()),
+        Types.Field.get(2, "long", Types.LongType.get()),
+        Types.Field.get(3, "float", Types.FloatType.get()),
+        Types.Field.get(4, "double", Types.DoubleType.get())
+    }));
+    Assertions.assertEquals(prunedSchema, checkedSchema);
+
+    // nest schema
+    Types.RecordType nestRecord = getNestRecordType();
+    InternalSchema originNestSchema = new InternalSchema(nestRecord.fields());
+    List<Integer> prunedNestCols = new ArrayList<>();
+    prunedNestCols.add(0);
+    prunedNestCols.add(1);
+    prunedNestCols.add(5);
+    prunedNestCols.add(11);
+    InternalSchema prunedNestSchema = InternalSchemaUtils.pruneInternalSchemaByID(originNestSchema, prunedNestCols, null);
+  }
+
+  @Test
+  public void testInternalSchemaVisitor() {
+    Types.RecordType nestRecord = getNestRecordType();
+    Map<String, Integer> result = InternalSchemaBuilder.getBuilder().buildNameToId(nestRecord);
+    Assertions.assertEquals(result.size(), 12);
+    Assertions.assertEquals(result.get("locations.value.long"), 11);
+    Assertions.assertEquals(result.get("locations.value.lat"), 10);
+    Assertions.assertEquals(result.get("locations.value"), 9);
+    Assertions.assertEquals(result.get("locations.key"), 8);
+    Assertions.assertEquals(result.get("doubles.element"), 7);
+
+    Types.RecordType simpleRecord = getSimpleRecordType();
+    Map<String, Integer> result1 = InternalSchemaBuilder.getBuilder().buildNameToId(simpleRecord);
+    Assertions.assertEquals(result1.size(), 5);
+    Assertions.assertEquals(result1.get("double"), 4);
+  }
+
+  public Types.RecordType getNestRecordType() {
+    return Types.RecordType.get(Types.Field.get(0, false, "id", Types.IntType.get()),
+        Types.Field.get(1, true, "data", Types.StringType.get()),
+        Types.Field.get(2, true, "preferences",
+            Types.RecordType.get(Types.Field.get(5, false, "feature1",
+                Types.BooleanType.get()), Types.Field.get(6, true, "feature2", Types.BooleanType.get()))),
+        Types.Field.get(3, false,"doubles", Types.ArrayType.get(7, false, Types.DoubleType.get())),
+        Types.Field.get(4, false, "locations", Types.MapType.get(8, 9, Types.StringType.get(),
+            Types.RecordType.get(Types.Field.get(10, false, "lat", Types.FloatType.get()), Types.Field.get(11, false, "long", Types.FloatType.get())), false))
+    );
+  }
+
+  public Types.RecordType getSimpleRecordType() {
+    return Types.RecordType.get(Arrays.asList(new Types.Field[] {
+        Types.Field.get(0, "bool", Types.BooleanType.get()),
+        Types.Field.get(1, "int", Types.IntType.get()),
+        Types.Field.get(2, "long", Types.LongType.get()),
+        Types.Field.get(3, "float", Types.FloatType.get()),
+        Types.Field.get(4, "double", Types.DoubleType.get())
+    }));
+  }
+}
diff --git a/hudi-examples/bin/hudi-delta-streamer b/hudi-examples/bin/hudi-delta-streamer
index 9accd7174ae4b..a1e9ee18804f0 100755
--- a/hudi-examples/bin/hudi-delta-streamer
+++ b/hudi-examples/bin/hudi-delta-streamer
@@ -32,7 +32,6 @@ exec "${SPARK_HOME}"/bin/spark-submit \
 --conf spark.kryoserializer.buffer.max=128m \
 --conf spark.yarn.queue=root.default \
 --conf spark.yarn.submit.waitAppCompletion=false \
---packages org.apache.spark:spark-avro_2.11:2.4.4 \
 --jars ${EXAMPLES_JARS} \
 --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \
 "${JAR_FILE}" \
diff --git a/hudi-examples/hudi-examples-common/pom.xml b/hudi-examples/hudi-examples-common/pom.xml
new file mode 100644
index 0000000000000..4a99d975dd571
--- /dev/null
+++ b/hudi-examples/hudi-examples-common/pom.xml
@@ -0,0 +1,109 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.12.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-common</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+        </resources>
+
+        <plugins>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <!-- Avro -->
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java b/hudi-examples/hudi-examples-common/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
similarity index 94%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
rename to hudi-examples/hudi-examples-common/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
index 78df2e78e7081..4ce11acfa0a92 100644
--- a/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
+++ b/hudi-examples/hudi-examples-common/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
@@ -43,7 +43,6 @@
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 
-
 /**
  * Class to be used to generate test data.
  */
@@ -63,7 +62,7 @@ public class HoodieExampleDataGenerator<T extends HoodieRecordPayload<T>> {
           + "{\"name\":\"fare\",\"type\": \"double\"}]}";
   public static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
 
-  private static Random rand = new Random(46474747);
+  private static final Random RAND = new Random(46474747);
 
   private final Map<Integer, KeyPartition> existingKeys;
   private final String[] partitionPaths;
@@ -98,11 +97,11 @@ public GenericRecord generateGenericRecord(String rowKey, String riderName, Stri
     rec.put("ts", timestamp);
     rec.put("rider", riderName);
     rec.put("driver", driverName);
-    rec.put("begin_lat", rand.nextDouble());
-    rec.put("begin_lon", rand.nextDouble());
-    rec.put("end_lat", rand.nextDouble());
-    rec.put("end_lon", rand.nextDouble());
-    rec.put("fare", rand.nextDouble() * 100);
+    rec.put("begin_lat", RAND.nextDouble());
+    rec.put("begin_lon", RAND.nextDouble());
+    rec.put("end_lat", RAND.nextDouble());
+    rec.put("end_lon", RAND.nextDouble());
+    rec.put("fare", RAND.nextDouble() * 100);
     return rec;
   }
 
@@ -120,7 +119,7 @@ public Stream<HoodieRecord<T>> generateInsertsStream(String commitTime, Integer
     int currSize = getNumExistingKeys();
 
     return IntStream.range(0, n).boxed().map(i -> {
-      String partitionPath = partitionPaths[rand.nextInt(partitionPaths.length)];
+      String partitionPath = partitionPaths[RAND.nextInt(partitionPaths.length)];
       HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
       KeyPartition kp = new KeyPartition();
       kp.key = key;
@@ -142,7 +141,7 @@ public Stream<HoodieRecord<T>> generateInsertsStream(String commitTime, Integer
   public List<HoodieRecord<T>> generateUpdates(String commitTime, Integer n) {
     List<HoodieRecord<T>> updates = new ArrayList<>();
     for (int i = 0; i < n; i++) {
-      KeyPartition kp = existingKeys.get(rand.nextInt(numExistingKeys - 1));
+      KeyPartition kp = existingKeys.get(RAND.nextInt(numExistingKeys - 1));
       HoodieRecord<T> record = generateUpdateRecord(kp.key, commitTime);
       updates.add(record);
     }
diff --git a/hudi-examples/hudi-examples-dbt/.gitignore b/hudi-examples/hudi-examples-dbt/.gitignore
new file mode 100644
index 0000000000000..0eb3fd035dbc7
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/.gitignore
@@ -0,0 +1,8 @@
+target/
+dbt_modules/
+logs/
+.tox/
+.idea/
+.DS_Store
+.vscode
+*.log
diff --git a/hudi-examples/hudi-examples-dbt/README.md b/hudi-examples/hudi-examples-dbt/README.md
new file mode 100644
index 0000000000000..8fe796d37c521
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/README.md
@@ -0,0 +1,134 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+## Testing dbt project: `hudi_examples_dbt`
+
+This dbt project transforms demonstrates hudi integration with dbt, it has a few models to demonstrate the different ways in which you can create hudi datasets using dbt.
+
+### What is this repo?
+What this repo _is_:
+- A self-contained playground dbt project, useful for testing out scripts, and communicating some of the core dbt concepts.
+
+### Running this project
+To get up and running with this project:
+1. Install dbt using [these instructions](https://docs.getdbt.com/docs/installation).
+
+2. Install [dbt-spark](https://github.com/dbt-labs/dbt-spark) package:
+```bash
+pip install dbt-spark
+```
+
+3. Clone this repo and change into the `hudi-examples-dbt` directory from the command line:
+```bash
+cd hudi-examples/hudi-examples-dbt
+```
+
+4. Set up a profile called `spark` to connect to a spark cluster by following [these instructions](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile). If you have access to a data warehouse, you can use those credentials – we recommend setting your [target schema](https://docs.getdbt.com/docs/configure-your-profile#section-populating-your-profile) to be a new schema (dbt will create the schema for you, as long as you have the right privileges). If you don't have access to an existing data warehouse, you can also setup a local postgres database and connect to it in your profile.
+
+> **NOTE:** You need to include the hudi spark bundle to the spark cluster, the latest supported version is 0.10.1.
+
+5. Ensure your profile is setup correctly from the command line:
+```bash
+dbt debug
+```
+
+Output of the above command should show this text at the end of the output:
+```bash
+All checks passed!
+```
+
+6. Run the models:
+```bash
+dbt run
+```
+
+Output should look like this:
+```bash
+05:47:28  Running with dbt=1.0.0
+05:47:28  Found 5 models, 10 tests, 0 snapshots, 0 analyses, 0 macros, 0 operations, 0 seed files, 0 sources, 0 exposures, 0 metrics
+05:47:28
+05:47:29  Concurrency: 1 threads (target='local')
+05:47:29
+05:47:29  1 of 5 START incremental model analytics.hudi_insert_table...................... [RUN]
+05:47:31  1 of 5 OK created incremental model analytics.hudi_insert_table................. [OK in 2.61s]
+05:47:31  2 of 5 START incremental model analytics.hudi_insert_overwrite_table............ [RUN]
+05:47:34  2 of 5 OK created incremental model analytics.hudi_insert_overwrite_table....... [OK in 3.19s]
+05:47:34  3 of 5 START incremental model analytics.hudi_upsert_table...................... [RUN]
+05:47:37  3 of 5 OK created incremental model analytics.hudi_upsert_table................. [OK in 2.68s]
+05:47:37  4 of 5 START incremental model analytics.hudi_upsert_partitioned_cow_table...... [RUN]
+05:47:40  4 of 5 OK created incremental model analytics.hudi_upsert_partitioned_cow_table. [OK in 2.60s]
+05:47:40  5 of 5 START incremental model analytics.hudi_upsert_partitioned_mor_table...... [RUN]
+05:47:42  5 of 5 OK created incremental model analytics.hudi_upsert_partitioned_mor_table. [OK in 2.53s]
+05:47:42
+05:47:42  Finished running 5 incremental models in 14.70s.
+05:47:42
+05:47:42  Completed successfully
+```
+7. Test the output of the models:
+```bash
+dbt test
+```
+Output should look like this:
+```bash
+05:48:17  Running with dbt=1.0.0
+05:48:17  Found 5 models, 10 tests, 0 snapshots, 0 analyses, 0 macros, 0 operations, 0 seed files, 0 sources, 0 exposures, 0 metrics
+05:48:17
+05:48:19  Concurrency: 1 threads (target='local')
+05:48:19
+05:48:19  1 of 10 START test not_null_hudi_insert_overwrite_table_id...................... [RUN]
+05:48:19  1 of 10 PASS not_null_hudi_insert_overwrite_table_id............................ [PASS in 0.50s]
+05:48:19  2 of 10 START test not_null_hudi_insert_overwrite_table_name.................... [RUN]
+05:48:20  2 of 10 PASS not_null_hudi_insert_overwrite_table_name.......................... [PASS in 0.45s]
+05:48:20  3 of 10 START test not_null_hudi_insert_overwrite_table_ts...................... [RUN]
+05:48:20  3 of 10 PASS not_null_hudi_insert_overwrite_table_ts............................ [PASS in 0.47s]
+05:48:20  4 of 10 START test not_null_hudi_insert_table_id................................ [RUN]
+05:48:20  4 of 10 PASS not_null_hudi_insert_table_id...................................... [PASS in 0.44s]
+05:48:20  5 of 10 START test not_null_hudi_upsert_table_id................................ [RUN]
+05:48:21  5 of 10 PASS not_null_hudi_upsert_table_id...................................... [PASS in 0.38s]
+05:48:21  6 of 10 START test not_null_hudi_upsert_table_name.............................. [RUN]
+05:48:21  6 of 10 PASS not_null_hudi_upsert_table_name.................................... [PASS in 0.40s]
+05:48:21  7 of 10 START test not_null_hudi_upsert_table_ts................................ [RUN]
+05:48:22  7 of 10 PASS not_null_hudi_upsert_table_ts...................................... [PASS in 0.38s]
+05:48:22  8 of 10 START test unique_hudi_insert_overwrite_table_id........................ [RUN]
+05:48:23  8 of 10 PASS unique_hudi_insert_overwrite_table_id.............................. [PASS in 1.32s]
+05:48:23  9 of 10 START test unique_hudi_insert_table_id.................................. [RUN]
+05:48:24  9 of 10 PASS unique_hudi_insert_table_id........................................ [PASS in 1.26s]
+05:48:24  10 of 10 START test unique_hudi_upsert_table_id................................. [RUN]
+05:48:25  10 of 10 PASS unique_hudi_upsert_table_id....................................... [PASS in 1.29s]
+05:48:26
+05:48:26  Finished running 10 tests in 8.23s.
+05:48:26
+05:48:26  Completed successfully
+05:48:26
+05:48:26  Done. PASS=10 WARN=0 ERROR=0 SKIP=0 TOTAL=10
+```
+
+8. Generate documentation for the project:
+```bash
+dbt docs generate
+```
+
+9. View the [documentation](http://127.0.0.1:8080/#!/overview) for the project after running the following command:
+```bash
+dbt docs serve
+```
+
+---
+For more information on dbt:
+- Read the [introduction to dbt](https://docs.getdbt.com/docs/introduction).
+- Read the [dbt viewpoint](https://docs.getdbt.com/docs/about/viewpoint).
+- Join the [dbt community](http://community.getdbt.com/).
+---
diff --git a/hudi-examples/hudi-examples-dbt/dbt_project.yml b/hudi-examples/hudi-examples-dbt/dbt_project.yml
new file mode 100644
index 0000000000000..dc5f5593d64d3
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/dbt_project.yml
@@ -0,0 +1,49 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+# Name your project! Project names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'hudi_examples_dbt'
+version: '1.0.0'
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project.
+profile: 'spark'
+
+# These configurations specify where dbt should look for different types of files.
+# The `source-paths` config, for example, states that models in this project can be
+# found in the "models/" directory. You probably won't need to change these!
+model-paths: ["models"]
+
+target-path: "target"  # directory which will store compiled SQL files
+clean-targets:         # directories to be removed by `dbt clean`
+    - "target"
+    - "dbt_modules"
+
+# Configuring models
+# Full documentation: https://docs.getdbt.com/docs/configuring-models
+
+# In this example config, we tell dbt to build all models in the example/ directory
+# as tables. These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+  +file_format: hudi
+  hudi_examples_dbt:
+      # Applies to all files under models/example/
+      example:
+          materialized: table
diff --git a/hudi-examples/hudi-examples-dbt/models/example/hudi_insert_overwrite_table.sql b/hudi-examples/hudi-examples-dbt/models/example/hudi_insert_overwrite_table.sql
new file mode 100644
index 0000000000000..e0afa5a456cf6
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/models/example/hudi_insert_overwrite_table.sql
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+/*
+ Example of an insert_overwrite for a non-partitioned table with incremental materialization.
+ */
+{{ config(
+    materialized='incremental',
+    file_format='hudi',
+    incremental_strategy='insert_overwrite',
+    options={
+        'type': 'cow',
+        'precombineKey': 'ts',
+    },
+    unique_key='id'
+   )
+}}
+
+select id, cast(rand() as string) as name, current_timestamp() as ts
+from {{ ref('hudi_insert_table') }}
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-dbt/models/example/hudi_insert_table.sql b/hudi-examples/hudi-examples-dbt/models/example/hudi_insert_table.sql
new file mode 100644
index 0000000000000..a77bf796cad28
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/models/example/hudi_insert_table.sql
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+/*
+ Example of an insert for a non-partitioned table with incremental materialization.
+ */
+{{
+    config(
+        materialized='incremental',
+        file_format='hudi',
+        unique_key='id'
+    )
+}}
+
+with source_data as (
+
+    select format_number(rand()*1000, 0) as id
+    union all
+    select null as id
+
+    )
+
+select *
+from source_data
+where id is not null
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_partitioned_cow_table.sql b/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_partitioned_cow_table.sql
new file mode 100644
index 0000000000000..caedcbc5fd055
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_partitioned_cow_table.sql
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+/*
+ Example of an upsert for a partitioned copy on write table with incremental materialization using merge strategy.
+ */
+{{ config(
+    materialized='incremental',
+    file_format='hudi',
+    incremental_strategy='merge',
+    options={
+        'type': 'cow',
+        'primaryKey': 'id',
+        'precombineKey': 'ts',
+    },
+    unique_key='id',
+    partition_by='datestr',
+    pre_hook=["set spark.sql.datetime.java8API.enabled=false;"],
+   )
+}}
+
+select id, name, current_timestamp() as ts, current_date as datestr
+from {{ ref('hudi_upsert_table') }}
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_partitioned_mor_table.sql b/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_partitioned_mor_table.sql
new file mode 100644
index 0000000000000..2beab7c4ae466
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_partitioned_mor_table.sql
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+/*
+ Example of an upsert for a partitioned merge on read table with incremental materialization using merge strategy.
+ */
+{{ config(
+    materialized='incremental',
+    file_format='hudi',
+    incremental_strategy='merge',
+    options={
+        'type': 'mor',
+        'primaryKey': 'id',
+        'precombineKey': 'ts',
+    },
+    unique_key='id',
+    partition_by='datestr',
+    pre_hook=["set spark.sql.datetime.java8API.enabled=false;"],
+   )
+}}
+
+select id, name, current_timestamp() as ts, current_date as datestr
+from {{ ref('hudi_upsert_table') }}
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_table.sql b/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_table.sql
new file mode 100644
index 0000000000000..b8ee5b3ed444b
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/models/example/hudi_upsert_table.sql
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+/*
+ Example of an upsert for a non-partitioned table with incremental materialization using merge strategy.
+ */
+{{ config(
+    materialized='incremental',
+    file_format='hudi',
+    incremental_strategy='merge',
+    options={
+        'type': 'cow',
+        'primaryKey': 'id',
+        'precombineKey': 'ts',
+    },
+    unique_key='id'
+   )
+}}
+
+select id, name, current_timestamp() as ts
+from {{ ref('hudi_insert_overwrite_table') }}
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-dbt/models/example/schema.yml b/hudi-examples/hudi-examples-dbt/models/example/schema.yml
new file mode 100644
index 0000000000000..64ae9099bdd97
--- /dev/null
+++ b/hudi-examples/hudi-examples-dbt/models/example/schema.yml
@@ -0,0 +1,104 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+version: 2
+
+models:
+    - name: hudi_insert_table
+      description: "Hudi insert non-partitioned table with incremental materialization"
+      columns:
+          - name: id
+            description: "The primary key for this table"
+            tests:
+                - unique
+                - not_null
+
+    - name: hudi_insert_overwrite_table
+      description: "Hudi insert overwrite non-partitioned table with incremental materialization"
+      columns:
+        - name: id
+          description: "The primary key for this table"
+          tests:
+            - unique
+            - not_null
+        - name: name
+          description: "Employee name"
+          tests:
+            - not_null
+        - name: ts
+          description: "Created timestamp"
+          tests:
+            - not_null
+
+    - name: hudi_upsert_table
+      description: "Hudi upsert non-partitioned table with incremental materialization"
+      columns:
+        - name: id
+          description: "The primary key for this table"
+          tests:
+            - unique
+            - not_null
+        - name: name
+          description: "Employee name"
+          tests:
+            - not_null
+        - name: ts
+          description: "Created timestamp"
+          tests:
+            - not_null
+
+    - name: hudi_upsert_paritioned_cow_table
+      description: "Hudi upsert partitioned copy-on-write table with incremental materialization using merge strategy"
+      columns:
+        - name: id
+          description: "The primary key for this table"
+          tests:
+            - unique
+            - not_null
+        - name: name
+          description: "Employee name"
+          tests:
+            - not_null
+        - name: ts
+          description: "Created timestamp"
+          tests:
+            - not_null
+        - name: datestr
+          description: "Partition date string column"
+          tests:
+            - not_null
+
+    - name: hudi_upsert_paritioned_mor_table
+      description: "Hudi upsert partitioned merge-on-read table with incremental materialization using merge strategy"
+      columns:
+        - name: id
+          description: "The primary key for this table"
+          tests:
+            - unique
+            - not_null
+        - name: name
+          description: "Employee name"
+          tests:
+            - not_null
+        - name: ts
+          description: "Created timestamp"
+          tests:
+            - not_null
+        - name: datestr
+          description: "Partition date string column"
+          tests:
+            - not_null
diff --git a/hudi-examples/hudi-examples-flink/pom.xml b/hudi-examples/hudi-examples-flink/pom.xml
new file mode 100644
index 0000000000000..6cfd5a533d35f
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/pom.xml
@@ -0,0 +1,364 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.12.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-flink</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+        <parquet.version>1.11.1</parquet.version>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.jacoco</groupId>
+                <artifactId>jacoco-maven-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>3.1.2</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+            <resource>
+                <directory>src/test/resources</directory>
+            </resource>
+        </resources>
+    </build>
+
+    <dependencies>
+        <!-- Hoodie -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-flink-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hadoop-mr</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hive-sync</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-sync-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-flink</artifactId>
+            <version>${project.version}</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <!-- Flink -->
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-clients_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.esotericsoftware.kryo</groupId>
+                    <artifactId>kryo</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.esotericsoftware.minlog</groupId>
+                    <artifactId>minlog</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.kafka</groupId>
+            <artifactId>kafka-clients</artifactId>
+            <version>${kafka.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-hadoop-compatibility_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-parquet_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-json</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-common</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>${flink.table.runtime.artifactId}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>${flink.table.planner.artifactId}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+            <version>${parquet.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.xerial.snappy</groupId>
+                    <artifactId>snappy-java</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Avro -->
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+            <!-- Override the version to be same with Flink avro -->
+            <version>1.10.0</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <!-- Hadoop -->
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-mapreduce-client-core</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>com.beust</groupId>
+            <artifactId>jcommander</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.twitter</groupId>
+            <artifactId>bijection-avro_${scala.binary.version}</artifactId>
+            <version>0.9.7</version>
+        </dependency>
+        <dependency>
+            <groupId>joda-time</groupId>
+            <artifactId>joda-time</artifactId>
+            <version>2.5</version>
+        </dependency>
+        <!-- Hive -->
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <classifier>${hive.exec.classifier}</classifier>
+            <exclusions>
+                <exclusion>
+                    <groupId>javax.mail</groupId>
+                    <artifactId>mail</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.eclipse.jetty.aggregate</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Test dependencies -->
+
+        <!-- Junit 5 dependencies -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.vintage</groupId>
+            <artifactId>junit-vintage-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-params</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Hoodie dependencies -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-flink-client</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+<!--        <dependency>-->
+<!--            <groupId>org.apache.hudi</groupId>-->
+<!--            <artifactId>hudi-flink_${scala.binary.version}</artifactId>-->
+<!--            <version>${project.version}</version>-->
+<!--            <scope>test</scope>-->
+<!--        </dependency>-->
+
+        <!-- Flink dependencies -->
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-test-utils_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>${flink.runtime.artifactId}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>${flink.table.runtime.artifactId}</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-json</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-csv</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Parquet Test-->
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+            <version>${parquet.version}</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/HoodieFlinkQuickstart.java b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/HoodieFlinkQuickstart.java
new file mode 100644
index 0000000000000..b3e105015a58c
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/HoodieFlinkQuickstart.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import static org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations.sql;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.execution.JobClient;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.api.EnvironmentSettings;
+import org.apache.flink.table.api.TableEnvironment;
+import org.apache.flink.table.api.TableResult;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.config.ExecutionConfigOptions;
+import org.apache.flink.table.api.internal.TableEnvironmentImpl;
+import org.apache.flink.table.catalog.ObjectPath;
+import org.apache.flink.table.catalog.exceptions.TableNotExistException;
+import org.apache.flink.types.Row;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory;
+import org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations;
+import org.jetbrains.annotations.NotNull;
+
+public final class HoodieFlinkQuickstart {
+  private EnvironmentSettings settings = null;
+  private TableEnvironment streamTableEnv = null;
+
+  private String tableName;
+
+  private HoodieFlinkQuickstart() {
+  }
+
+  public static HoodieFlinkQuickstart instance() {
+    return new HoodieFlinkQuickstart();
+  }
+
+  public static void main(String[] args) throws TableNotExistException, InterruptedException {
+    if (args.length < 3) {
+      System.err.println("Usage: HoodieWriteClientExample <tablePath> <tableName> <tableType>");
+      System.exit(1);
+    }
+    String tablePath = args[0];
+    String tableName = args[1];
+    String tableType = args[2];
+
+    HoodieFlinkQuickstart flinkQuickstart = instance();
+    flinkQuickstart.initEnv();
+
+    // create filesystem table named source
+    flinkQuickstart.createFileSource();
+
+    // create hudi table
+    flinkQuickstart.createHudiTable(tablePath, tableName, HoodieTableType.valueOf(tableType));
+
+    // insert data
+    flinkQuickstart.insertData();
+
+    // query data
+    flinkQuickstart.queryData();
+
+    // update data
+    flinkQuickstart.updateData();
+  }
+
+  public void initEnv() {
+    if (this.streamTableEnv == null) {
+      settings = EnvironmentSettings.newInstance().build();
+      TableEnvironment streamTableEnv = TableEnvironmentImpl.create(settings);
+      streamTableEnv.getConfig().getConfiguration()
+          .setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
+      Configuration execConf = streamTableEnv.getConfig().getConfiguration();
+      execConf.setString("execution.checkpointing.interval", "2s");
+      // configure not to retry after failure
+      execConf.setString("restart-strategy", "fixed-delay");
+      execConf.setString("restart-strategy.fixed-delay.attempts", "0");
+      this.streamTableEnv = streamTableEnv;
+    }
+  }
+
+  public TableEnvironment getStreamTableEnv() {
+    return streamTableEnv;
+  }
+
+  public TableEnvironment getBatchTableEnv() {
+    Configuration conf = new Configuration();
+    // for batch upsert use cases: current suggestion is to disable these 2 options,
+    // from 1.14, flink runtime execution mode has switched from streaming
+    // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode),
+    // current batch execution mode has these limitations:
+    //
+    // 1. the keyed stream default to always sort the inputs by key;
+    // 2. the batch state-backend requires the inputs sort by state key
+    //
+    // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records,
+    // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct,
+    // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode
+    // to keep the strategy before 1.14.
+    conf.setBoolean("execution.sorted-inputs.enabled", false);
+    conf.setBoolean("execution.batch-state-backend.enabled", false);
+    StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf);
+    settings = EnvironmentSettings.newInstance().inBatchMode().build();
+    TableEnvironment batchTableEnv = StreamTableEnvironment.create(execEnv, settings);
+    batchTableEnv.getConfig().getConfiguration()
+        .setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
+    return batchTableEnv;
+  }
+
+  public void createHudiTable(String tablePath, String tableName,
+                              HoodieTableType tableType) {
+    this.tableName = tableName;
+
+    // create hudi table
+    String hoodieTableDDL = sql(tableName)
+        .option(FlinkOptions.PATH, tablePath)
+        .option(FlinkOptions.READ_AS_STREAMING, true)
+        .option(FlinkOptions.TABLE_TYPE, tableType)
+        .end();
+    streamTableEnv.executeSql(hoodieTableDDL);
+  }
+
+  public void createFileSource() {
+    // create filesystem table named source
+    String createSource = QuickstartConfigurations.getFileSourceDDL("source");
+    streamTableEnv.executeSql(createSource);
+  }
+
+  @NotNull List<Row> insertData() throws InterruptedException, TableNotExistException {
+    // insert data
+    String insertInto = String.format("insert into %s select * from source", tableName);
+    execInsertSql(streamTableEnv, insertInto);
+    return queryData();
+  }
+
+  List<Row> queryData() throws InterruptedException, TableNotExistException {
+    // query data
+    // reading from the latest commit instance.
+    return execSelectSql(streamTableEnv, String.format("select * from %s", tableName), 10);
+  }
+
+  @NotNull List<Row> updateData() throws InterruptedException, TableNotExistException {
+    // update data
+    String insertInto = String.format("insert into %s select * from source", tableName);
+    execInsertSql(getStreamTableEnv(), insertInto);
+    return queryData();
+  }
+
+  public static void execInsertSql(TableEnvironment tEnv, String insert) {
+    TableResult tableResult = tEnv.executeSql(insert);
+    // wait to finish
+    try {
+      tableResult.getJobClient().get().getJobExecutionResult().get();
+    } catch (InterruptedException | ExecutionException ex) {
+      // ignored
+    }
+  }
+
+  public static List<Row> execSelectSql(TableEnvironment tEnv, String select, long timeout)
+      throws InterruptedException, TableNotExistException {
+    return execSelectSql(tEnv, select, timeout, null);
+  }
+
+  public static List<Row> execSelectSql(TableEnvironment tEnv, String select, long timeout, String sourceTable)
+      throws InterruptedException, TableNotExistException {
+    final String sinkDDL;
+    if (sourceTable != null) {
+      // use the source table schema as the sink schema if the source table was specified, .
+      ObjectPath objectPath = new ObjectPath(tEnv.getCurrentDatabase(), sourceTable);
+      TableSchema schema = tEnv.getCatalog(tEnv.getCurrentCatalog()).get().getTable(objectPath).getSchema();
+      sinkDDL = QuickstartConfigurations.getCollectSinkDDL("sink", schema);
+    } else {
+      sinkDDL = QuickstartConfigurations.getCollectSinkDDL("sink");
+    }
+    return execSelectSql(tEnv, select, sinkDDL, timeout);
+  }
+
+  public static List<Row> execSelectSql(TableEnvironment tEnv, String select, String sinkDDL, long timeout)
+      throws InterruptedException {
+    tEnv.executeSql("DROP TABLE IF EXISTS sink");
+    tEnv.executeSql(sinkDDL);
+    TableResult tableResult = tEnv.executeSql("insert into sink " + select);
+    // wait for the timeout then cancels the job
+    TimeUnit.SECONDS.sleep(timeout);
+    tableResult.getJobClient().ifPresent(JobClient::cancel);
+    tEnv.executeSql("DROP TABLE IF EXISTS sink");
+    return CollectSinkTableFactory.RESULT.values().stream()
+        .flatMap(Collection::stream)
+        .collect(Collectors.toList());
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/CollectSinkTableFactory.java b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/CollectSinkTableFactory.java
new file mode 100644
index 0000000000000..5687a7c146720
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/CollectSinkTableFactory.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.factory;
+
+import org.apache.flink.api.common.state.ListState;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.runtime.state.FunctionInitializationContext;
+import org.apache.flink.runtime.state.FunctionSnapshotContext;
+import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
+import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.connector.sink.DynamicTableSink;
+import org.apache.flink.table.connector.sink.SinkFunctionProvider;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.factories.DynamicTableSinkFactory;
+import org.apache.flink.table.factories.FactoryUtil;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.utils.TypeConversions;
+import org.apache.flink.types.Row;
+import org.apache.flink.types.RowKind;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Factory for CollectTableSink.
+ *
+ * <p>Note: The CollectTableSink collects all the data of a table into a global collection {@code RESULT},
+ * so the tests should executed in single thread and the table name should be the same.
+ */
+public class CollectSinkTableFactory implements DynamicTableSinkFactory {
+  public static final String FACTORY_ID = "collect";
+
+  // global results to collect and query
+  public static final Map<Integer, List<Row>> RESULT = new HashMap<>();
+
+  @Override
+  public DynamicTableSink createDynamicTableSink(Context context) {
+    FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
+    helper.validate();
+
+    TableSchema schema = context.getCatalogTable().getSchema();
+    RESULT.clear();
+    return new CollectTableSink(schema, context.getObjectIdentifier().getObjectName());
+  }
+
+  @Override
+  public String factoryIdentifier() {
+    return FACTORY_ID;
+  }
+
+  @Override
+  public Set<ConfigOption<?>> requiredOptions() {
+    return Collections.emptySet();
+  }
+
+  @Override
+  public Set<ConfigOption<?>> optionalOptions() {
+    return Collections.emptySet();
+  }
+
+  // --------------------------------------------------------------------------------------------
+  // Table sinks
+  // --------------------------------------------------------------------------------------------
+
+  /**
+   * Values {@link DynamicTableSink} for testing.
+   */
+  private static class CollectTableSink implements DynamicTableSink {
+
+    private final TableSchema schema;
+    private final String tableName;
+
+    private CollectTableSink(
+        TableSchema schema,
+        String tableName) {
+      this.schema = schema;
+      this.tableName = tableName;
+    }
+
+    @Override
+    public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
+      return ChangelogMode.newBuilder()
+          .addContainedKind(RowKind.INSERT)
+          .addContainedKind(RowKind.DELETE)
+          .addContainedKind(RowKind.UPDATE_AFTER)
+          .build();
+    }
+
+    @Override
+    public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
+      final DataType rowType = schema.toPhysicalRowDataType();
+      final RowTypeInfo rowTypeInfo = (RowTypeInfo) TypeConversions.fromDataTypeToLegacyInfo(rowType);
+      DataStructureConverter converter = context.createDataStructureConverter(schema.toPhysicalRowDataType());
+      return SinkFunctionProvider.of(new CollectSinkFunction(converter, rowTypeInfo));
+    }
+
+    @Override
+    public DynamicTableSink copy() {
+      return new CollectTableSink(schema, tableName);
+    }
+
+    @Override
+    public String asSummaryString() {
+      return "CollectSink";
+    }
+  }
+
+  static class CollectSinkFunction extends RichSinkFunction<RowData> implements CheckpointedFunction {
+
+    private static final long serialVersionUID = 1L;
+    private final DynamicTableSink.DataStructureConverter converter;
+    private final RowTypeInfo rowTypeInfo;
+
+    protected transient ListState<Row> resultState;
+    protected transient List<Row> localResult;
+
+    private int taskID;
+
+    protected CollectSinkFunction(DynamicTableSink.DataStructureConverter converter, RowTypeInfo rowTypeInfo) {
+      this.converter = converter;
+      this.rowTypeInfo = rowTypeInfo;
+    }
+
+    @Override
+    public void invoke(RowData value, Context context) {
+      Row row = (Row) converter.toExternal(value);
+      assert row != null;
+      row.setKind(value.getRowKind());
+      RESULT.get(taskID).add(row);
+    }
+
+    @Override
+    public void initializeState(FunctionInitializationContext context) throws Exception {
+      this.resultState = context.getOperatorStateStore().getListState(
+          new ListStateDescriptor<>("sink-results", rowTypeInfo));
+      this.localResult = new ArrayList<>();
+      if (context.isRestored()) {
+        for (Row value : resultState.get()) {
+          localResult.add(value);
+        }
+      }
+      this.taskID = getRuntimeContext().getIndexOfThisSubtask();
+      synchronized (CollectSinkTableFactory.class) {
+        RESULT.put(taskID, localResult);
+      }
+    }
+
+    @Override
+    public void snapshotState(FunctionSnapshotContext context) throws Exception {
+      resultState.clear();
+      resultState.addAll(RESULT.get(taskID));
+    }
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/ContinuousFileSourceFactory.java b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/ContinuousFileSourceFactory.java
new file mode 100644
index 0000000000000..834fa9f252fd5
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/factory/ContinuousFileSourceFactory.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.factory;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.ConfigOptions;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.table.api.ValidationException;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import org.apache.flink.table.factories.DynamicTableSourceFactory;
+import org.apache.flink.table.factories.FactoryUtil;
+
+import java.util.Collections;
+import java.util.Set;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.examples.quickstart.source.ContinuousFileSource;
+
+/**
+ * Factory for ContinuousFileSource.
+ */
+public class ContinuousFileSourceFactory implements DynamicTableSourceFactory {
+  public static final String FACTORY_ID = "continuous-file-source";
+
+  public static final ConfigOption<Integer> CHECKPOINTS = ConfigOptions
+      .key("checkpoints")
+      .intType()
+      .defaultValue(2)
+      .withDescription("Number of checkpoints to write the data set as, default 2");
+
+  @Override
+  public DynamicTableSource createDynamicTableSource(Context context) {
+    FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
+    helper.validate();
+
+    Configuration conf = (Configuration) helper.getOptions();
+    Path path = new Path(conf.getOptional(FlinkOptions.PATH).orElseThrow(() ->
+        new ValidationException("Option [path] should be not empty.")));
+    return new ContinuousFileSource(context.getCatalogTable().getResolvedSchema(), path, conf);
+  }
+
+  @Override
+  public String factoryIdentifier() {
+    return FACTORY_ID;
+  }
+
+  @Override
+  public Set<ConfigOption<?>> requiredOptions() {
+    return Collections.singleton(FlinkOptions.PATH);
+  }
+
+  @Override
+  public Set<ConfigOption<?>> optionalOptions() {
+    return Collections.singleton(CHECKPOINTS);
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/source/ContinuousFileSource.java b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/source/ContinuousFileSource.java
new file mode 100644
index 0000000000000..b457a7e6080ab
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/source/ContinuousFileSource.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.source;
+
+import org.apache.flink.api.common.state.CheckpointListener;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.formats.common.TimestampFormat;
+import org.apache.flink.formats.json.JsonRowDataDeserializationSchema;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.api.functions.source.SourceFunction;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.connector.source.DataStreamScanProvider;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import org.apache.flink.table.connector.source.ScanTableSource;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
+import org.apache.flink.table.types.logical.RowType;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory.CHECKPOINTS;
+
+/**
+ * A continuous file source that can trigger checkpoints continuously.
+ *
+ * <p>It loads the data in the specified file and split the data into number of checkpoints batches.
+ * Say, if you want 4 checkpoints and there are 8 records in the file, the emit strategy is:
+ *
+ * <pre>
+ *   | 2 records | 2 records | 2 records | 2 records |
+ *   | cp1       | cp2       |cp3        | cp4       |
+ * </pre>
+ *
+ * <p>If all the data are flushed out, it waits for the next checkpoint to finish and tear down the source.
+ */
+public class ContinuousFileSource implements ScanTableSource {
+
+  private final ResolvedSchema tableSchema;
+  private final Path path;
+  private final Configuration conf;
+
+  public ContinuousFileSource(
+      ResolvedSchema tableSchema,
+      Path path,
+      Configuration conf) {
+    this.tableSchema = tableSchema;
+    this.path = path;
+    this.conf = conf;
+  }
+
+  @Override
+  public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
+    return new DataStreamScanProvider() {
+
+      @Override
+      public boolean isBounded() {
+        return false;
+      }
+
+      @Override
+      public DataStream<RowData> produceDataStream(StreamExecutionEnvironment execEnv) {
+        final RowType rowType = (RowType) tableSchema.toSourceRowDataType().getLogicalType();
+        JsonRowDataDeserializationSchema deserializationSchema = new JsonRowDataDeserializationSchema(
+            rowType,
+            InternalTypeInfo.of(rowType),
+            false,
+            true,
+            TimestampFormat.ISO_8601);
+
+        return execEnv.addSource(new BoundedSourceFunction(path, conf.getInteger(CHECKPOINTS)))
+            .name("continuous_file_source")
+            .setParallelism(1)
+            .map(record -> deserializationSchema.deserialize(record.getBytes(StandardCharsets.UTF_8)),
+                InternalTypeInfo.of(rowType));
+      }
+    };
+  }
+
+  @Override
+  public ChangelogMode getChangelogMode() {
+    return ChangelogMode.insertOnly();
+  }
+
+  @Override
+  public DynamicTableSource copy() {
+    return new ContinuousFileSource(this.tableSchema, this.path, this.conf);
+  }
+
+  @Override
+  public String asSummaryString() {
+    return "ContinuousFileSource";
+  }
+
+  /**
+   * Source function that partition the data into given number checkpoints batches.
+   */
+  public static class BoundedSourceFunction implements SourceFunction<String>, CheckpointListener {
+    private final Path path;
+    private List<String> dataBuffer;
+
+    private final int checkpoints;
+    private final AtomicInteger currentCP = new AtomicInteger(0);
+
+    private volatile boolean isRunning = true;
+
+    public BoundedSourceFunction(Path path, int checkpoints) {
+      this.path = path;
+      this.checkpoints = checkpoints;
+    }
+
+    @Override
+    public void run(SourceContext<String> context) throws Exception {
+      if (this.dataBuffer == null) {
+        loadDataBuffer();
+      }
+      int oldCP = this.currentCP.get();
+      boolean finish = false;
+      while (isRunning) {
+        int batchSize = this.dataBuffer.size() / this.checkpoints;
+        int start = batchSize * oldCP;
+        synchronized (context.getCheckpointLock()) {
+          for (int i = start; i < start + batchSize; i++) {
+            if (i >= this.dataBuffer.size()) {
+              finish = true;
+              break;
+              // wait for the next checkpoint and exit
+            }
+            context.collect(this.dataBuffer.get(i));
+          }
+        }
+        oldCP++;
+        while (this.currentCP.get() < oldCP) {
+          synchronized (context.getCheckpointLock()) {
+            context.getCheckpointLock().wait(10);
+          }
+        }
+        if (finish || !isRunning) {
+          return;
+        }
+      }
+    }
+
+    @Override
+    public void cancel() {
+      this.isRunning = false;
+    }
+
+    private void loadDataBuffer() {
+      try {
+        this.dataBuffer = Files.readAllLines(Paths.get(this.path.toUri()));
+      } catch (IOException e) {
+        throw new RuntimeException("Read file " + this.path + " error", e);
+      }
+    }
+
+    @Override
+    public void notifyCheckpointComplete(long l) {
+      this.currentCP.incrementAndGet();
+    }
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/QuickstartConfigurations.java b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/QuickstartConfigurations.java
new file mode 100644
index 0000000000000..8dfd9df9eb479
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/QuickstartConfigurations.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.utils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory;
+import org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory;
+import org.apache.hudi.streamer.FlinkStreamerConfig;
+
+/**
+ * Configurations for the test.
+ */
+public class QuickstartConfigurations {
+  private QuickstartConfigurations() {
+  }
+
+  public static final DataType ROW_DATA_TYPE = DataTypes.ROW(
+          DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
+          DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
+          DataTypes.FIELD("age", DataTypes.INT()),
+          DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
+          DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
+      .notNull();
+
+  public static final RowType ROW_TYPE = (RowType) ROW_DATA_TYPE.getLogicalType();
+
+  public static final ResolvedSchema TABLE_SCHEMA = SchemaBuilder.instance()
+      .fields(ROW_TYPE.getFieldNames(), ROW_DATA_TYPE.getChildren())
+      .build();
+
+  private static final List<String> FIELDS = ROW_TYPE.getFields().stream()
+      .map(RowType.RowField::asSummaryString).collect(Collectors.toList());
+
+  public static final DataType ROW_DATA_TYPE_WIDER = DataTypes.ROW(
+          DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
+          DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
+          DataTypes.FIELD("age", DataTypes.INT()),
+          DataTypes.FIELD("salary", DataTypes.DOUBLE()),
+          DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
+          DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
+      .notNull();
+
+  public static final RowType ROW_TYPE_WIDER = (RowType) ROW_DATA_TYPE_WIDER.getLogicalType();
+
+  public static String getCreateHoodieTableDDL(String tableName, Map<String, String> options) {
+    return getCreateHoodieTableDDL(tableName, options, true, "partition");
+  }
+
+  public static String getCreateHoodieTableDDL(
+      String tableName,
+      Map<String, String> options,
+      boolean havePartition,
+      String partitionField) {
+    return getCreateHoodieTableDDL(tableName, FIELDS, options, havePartition, "uuid", partitionField);
+  }
+
+  public static String getCreateHoodieTableDDL(
+      String tableName,
+      List<String> fields,
+      Map<String, String> options,
+      boolean havePartition,
+      String pkField,
+      String partitionField) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("create table ").append(tableName).append("(\n");
+    for (String field : fields) {
+      builder.append("  ").append(field).append(",\n");
+    }
+    builder.append("  PRIMARY KEY(").append(pkField).append(") NOT ENFORCED\n")
+        .append(")\n");
+    if (havePartition) {
+      builder.append("PARTITIONED BY (`").append(partitionField).append("`)\n");
+    }
+    final String connector = options.computeIfAbsent("connector", k -> "hudi");
+    builder.append("with (\n"
+        + "  'connector' = '").append(connector).append("'");
+    options.forEach((k, v) -> builder.append(",\n")
+        .append("  '").append(k).append("' = '").append(v).append("'"));
+    builder.append("\n)");
+    return builder.toString();
+  }
+
+  public static String getCreateHudiCatalogDDL(final String catalogName, final String catalogPath) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("create catalog ").append(catalogName).append(" with (\n");
+    builder.append("  'type' = 'hudi',\n"
+        + "  'catalog.path' = '").append(catalogPath).append("'");
+    builder.append("\n)");
+    return builder.toString();
+  }
+
+  public static String getFileSourceDDL(String tableName) {
+    return getFileSourceDDL(tableName, "source-file.json");
+  }
+
+  public static String getFileSourceDDL(String tableName, int checkpoints) {
+    return getFileSourceDDL(tableName, "source-file.json", checkpoints);
+  }
+
+  public static String getFileSourceDDL(String tableName, String fileName) {
+    return getFileSourceDDL(tableName, fileName, 2);
+  }
+
+  public static String getFileSourceDDL(String tableName, String fileName, int checkpoints) {
+    String sourcePath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource(fileName)).toString();
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = '" + ContinuousFileSourceFactory.FACTORY_ID + "',\n"
+        + "  'path' = '" + sourcePath + "',\n"
+        + "  'checkpoints' = '" + checkpoints + "'\n"
+        + ")";
+  }
+
+  public static String getCollectSinkDDL(String tableName) {
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = '" + CollectSinkTableFactory.FACTORY_ID + "'"
+        + ")";
+  }
+
+  public static String getCollectSinkDDL(String tableName, TableSchema tableSchema) {
+    final StringBuilder builder = new StringBuilder("create table " + tableName + "(\n");
+    String[] fieldNames = tableSchema.getFieldNames();
+    DataType[] fieldTypes = tableSchema.getFieldDataTypes();
+    for (int i = 0; i < fieldNames.length; i++) {
+      builder.append("  `")
+          .append(fieldNames[i])
+          .append("` ")
+          .append(fieldTypes[i].toString());
+      if (i != fieldNames.length - 1) {
+        builder.append(",");
+      }
+      builder.append("\n");
+    }
+    final String withProps = ""
+        + ") with (\n"
+        + "  'connector' = '" + CollectSinkTableFactory.FACTORY_ID + "'\n"
+        + ")";
+    builder.append(withProps);
+    return builder.toString();
+  }
+
+  public static String getCsvSourceDDL(String tableName, String fileName) {
+    String sourcePath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource(fileName)).toString();
+    return "create table " + tableName + "(\n"
+        + "  uuid varchar(20),\n"
+        + "  name varchar(10),\n"
+        + "  age int,\n"
+        + "  ts timestamp(3),\n"
+        + "  `partition` varchar(20)\n"
+        + ") with (\n"
+        + "  'connector' = 'filesystem',\n"
+        + "  'path' = '" + sourcePath + "',\n"
+        + "  'format' = 'csv'\n"
+        + ")";
+  }
+
+  public static final RowDataSerializer SERIALIZER = new RowDataSerializer(ROW_TYPE);
+
+  public static Configuration getDefaultConf(String tablePath) {
+    Configuration conf = new Configuration();
+    conf.setString(FlinkOptions.PATH, tablePath);
+    conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA_PATH,
+        Objects.requireNonNull(Thread.currentThread()
+            .getContextClassLoader().getResource("test_read_schema.avsc")).toString());
+    conf.setString(FlinkOptions.TABLE_NAME, "TestHoodieTable");
+    conf.setString(FlinkOptions.PARTITION_PATH_FIELD, "partition");
+    return conf;
+  }
+
+  public static FlinkStreamerConfig getDefaultStreamerConf(String tablePath) {
+    FlinkStreamerConfig streamerConf = new FlinkStreamerConfig();
+    streamerConf.targetBasePath = tablePath;
+    streamerConf.sourceAvroSchemaPath = Objects.requireNonNull(Thread.currentThread()
+        .getContextClassLoader().getResource("test_read_schema.avsc")).toString();
+    streamerConf.targetTableName = "TestHoodieTable";
+    streamerConf.partitionPathField = "partition";
+    streamerConf.tableType = "COPY_ON_WRITE";
+    streamerConf.checkpointInterval = 4000L;
+    return streamerConf;
+  }
+
+  /**
+   * Creates the tool to build hoodie table DDL.
+   */
+  public static Sql sql(String tableName) {
+    return new Sql(tableName);
+  }
+
+  public static Catalog catalog(String catalogName) {
+    return new Catalog(catalogName);
+  }
+
+  // -------------------------------------------------------------------------
+  //  Utilities
+  // -------------------------------------------------------------------------
+
+  /**
+   * Tool to build hoodie table DDL with schema {@link #TABLE_SCHEMA}.
+   */
+  public static class Sql {
+    private final Map<String, String> options;
+    private final String tableName;
+    private List<String> fields = new ArrayList<>();
+    private boolean withPartition = true;
+    private String pkField = "uuid";
+    private String partitionField = "partition";
+
+    public Sql(String tableName) {
+      options = new HashMap<>();
+      this.tableName = tableName;
+    }
+
+    public Sql option(ConfigOption<?> option, Object val) {
+      this.options.put(option.key(), val.toString());
+      return this;
+    }
+
+    public Sql option(String key, Object val) {
+      this.options.put(key, val.toString());
+      return this;
+    }
+
+    public Sql options(Map<String, String> options) {
+      this.options.putAll(options);
+      return this;
+    }
+
+    public Sql noPartition() {
+      this.withPartition = false;
+      return this;
+    }
+
+    public Sql pkField(String pkField) {
+      this.pkField = pkField;
+      return this;
+    }
+
+    public Sql partitionField(String partitionField) {
+      this.partitionField = partitionField;
+      return this;
+    }
+
+    public Sql field(String fieldSchema) {
+      fields.add(fieldSchema);
+      return this;
+    }
+
+    public String end() {
+      if (this.fields.size() == 0) {
+        this.fields = FIELDS;
+      }
+      return QuickstartConfigurations.getCreateHoodieTableDDL(this.tableName, this.fields, options,
+          this.withPartition, this.pkField, this.partitionField);
+    }
+  }
+
+  public static class Catalog {
+    private final String catalogName;
+    private String catalogPath = ".";
+
+    public Catalog(String catalogName) {
+      this.catalogName = catalogName;
+    }
+
+    public Catalog catalogPath(String catalogPath) {
+      this.catalogPath = catalogPath;
+      return this;
+    }
+
+    public String end() {
+      return QuickstartConfigurations.getCreateHudiCatalogDDL(catalogName, catalogPath);
+    }
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/SchemaBuilder.java b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/SchemaBuilder.java
new file mode 100644
index 0000000000000..76306f780646d
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/utils/SchemaBuilder.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart.utils;
+
+import org.apache.flink.table.catalog.Column;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.catalog.UniqueConstraint;
+import org.apache.flink.table.catalog.WatermarkSpec;
+import org.apache.flink.table.types.DataType;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Builder for {@link ResolvedSchema}.
+ */
+public class SchemaBuilder {
+  private List<Column> columns;
+  private List<WatermarkSpec> watermarkSpecs;
+  private UniqueConstraint constraint;
+
+  public static SchemaBuilder instance() {
+    return new SchemaBuilder();
+  }
+
+  private SchemaBuilder() {
+    this.columns = new ArrayList<>();
+    this.watermarkSpecs = new ArrayList<>();
+  }
+
+  public SchemaBuilder field(String name, DataType type) {
+    this.columns.add(Column.physical(name, type));
+    return this;
+  }
+
+  public SchemaBuilder fields(List<String> names, List<DataType> types) {
+    List<Column> columns = IntStream.range(0, names.size())
+        .mapToObj(idx -> Column.physical(names.get(idx), types.get(idx)))
+        .collect(Collectors.toList());
+    this.columns.addAll(columns);
+    return this;
+  }
+
+  public SchemaBuilder primaryKey(String... columns) {
+    this.constraint = UniqueConstraint.primaryKey("pk", Arrays.asList(columns));
+    return this;
+  }
+
+  public ResolvedSchema build() {
+    return new ResolvedSchema(columns, watermarkSpecs, constraint);
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/hudi-examples/hudi-examples-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
new file mode 100644
index 0000000000000..27a137292b388
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory
+org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory
diff --git a/hudi-examples/hudi-examples-flink/src/main/resources/source-file.json b/hudi-examples/hudi-examples-flink/src/main/resources/source-file.json
new file mode 100644
index 0000000000000..2f628e29c535b
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/main/resources/source-file.json
@@ -0,0 +1,8 @@
+{"uuid": "id1", "name": "Danny", "age": 23, "ts": "1970-01-01T00:00:01", "partition": "par1"}
+{"uuid": "id2", "name": "Stephen", "age": 33, "ts": "1970-01-01T00:00:02", "partition": "par1"}
+{"uuid": "id3", "name": "Julian", "age": 53, "ts": "1970-01-01T00:00:03", "partition": "par2"}
+{"uuid": "id4", "name": "Fabian", "age": 31, "ts": "1970-01-01T00:00:04", "partition": "par2"}
+{"uuid": "id5", "name": "Sophia", "age": 18, "ts": "1970-01-01T00:00:05", "partition": "par3"}
+{"uuid": "id6", "name": "Emma", "age": 20, "ts": "1970-01-01T00:00:06", "partition": "par3"}
+{"uuid": "id7", "name": "Bob", "age": 44, "ts": "1970-01-01T00:00:07", "partition": "par4"}
+{"uuid": "id8", "name": "Han", "age": 56, "ts": "1970-01-01T00:00:08", "partition": "par4"}
diff --git a/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieFlinkQuickstart.java b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieFlinkQuickstart.java
new file mode 100644
index 0000000000000..4a2768119bf8e
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieFlinkQuickstart.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import org.apache.flink.test.util.AbstractTestBase;
+import org.apache.flink.types.Row;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+
+import java.io.File;
+import java.util.List;
+
+import static org.apache.hudi.examples.quickstart.TestQuickstartData.assertRowsEquals;
+
+/**
+ * IT cases for Hoodie table source and sink.
+ */
+public class TestHoodieFlinkQuickstart extends AbstractTestBase {
+  private final HoodieFlinkQuickstart flinkQuickstart = HoodieFlinkQuickstart.instance();
+
+  @BeforeEach
+  void beforeEach() {
+    flinkQuickstart.initEnv();
+  }
+
+  @TempDir
+  File tempFile;
+
+  @ParameterizedTest
+  @EnumSource(value = HoodieTableType.class)
+  void testHoodieFlinkQuickstart(HoodieTableType tableType) throws Exception {
+    // create filesystem table named source
+    flinkQuickstart.createFileSource();
+
+    // create hudi table
+    flinkQuickstart.createHudiTable(tempFile.getAbsolutePath(), "t1", tableType);
+
+    // insert data
+    List<Row> rows = flinkQuickstart.insertData();
+    assertRowsEquals(rows, TestQuickstartData.DATA_SET_SOURCE_INSERT_LATEST_COMMIT);
+
+    // query data
+    List<Row> rows1 = flinkQuickstart.queryData();
+    assertRowsEquals(rows1, TestQuickstartData.DATA_SET_SOURCE_INSERT_LATEST_COMMIT);
+
+    // update data
+    List<Row> rows2 = flinkQuickstart.updateData();
+    assertRowsEquals(rows2, TestQuickstartData.DATA_SET_SOURCE_INSERT_LATEST_COMMIT);
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
new file mode 100644
index 0000000000000..97a682c3a3903
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.data.conversion.DataStructureConverter;
+import org.apache.flink.table.data.conversion.DataStructureConverters;
+import org.apache.flink.table.data.writer.BinaryRowWriter;
+import org.apache.flink.table.data.writer.BinaryWriter;
+import org.apache.flink.table.runtime.typeutils.InternalSerializers;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.types.RowKind;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.HoodieCommonConfig;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
+import org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations;
+import org.apache.parquet.Strings;
+import org.apache.parquet.avro.AvroParquetReader;
+import org.apache.parquet.hadoop.ParquetReader;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Properties;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static junit.framework.TestCase.assertEquals;
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
+import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+/**
+ * Data set for testing, also some utilities to check the results.
+ */
+public class TestQuickstartData {
+
+  public static List<RowData> DATA_SET_INSERT_DUPLICATES = new ArrayList<>();
+
+  static {
+    IntStream.range(0, 5).forEach(i -> DATA_SET_INSERT_DUPLICATES.add(
+        insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+            TimestampData.fromEpochMillis(1), StringData.fromString("par1"))));
+  }
+
+  public static List<RowData> DATA_SET_INSERT_SAME_KEY = new ArrayList<>();
+
+  static {
+    IntStream.range(0, 5).forEach(i -> DATA_SET_INSERT_SAME_KEY.add(
+        insertRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+            TimestampData.fromEpochMillis(i), StringData.fromString("par1"))));
+  }
+
+  // data set of source-file.json latest commit.
+  public static List<RowData> DATA_SET_SOURCE_INSERT_LATEST_COMMIT = Arrays.asList(
+      insertRow(StringData.fromString("id5"), StringData.fromString("Sophia"), 18,
+          TimestampData.fromEpochMillis(5000), StringData.fromString("par3")),
+      insertRow(StringData.fromString("id6"), StringData.fromString("Emma"), 20,
+          TimestampData.fromEpochMillis(6000), StringData.fromString("par3")),
+      insertRow(StringData.fromString("id7"), StringData.fromString("Bob"), 44,
+          TimestampData.fromEpochMillis(7000), StringData.fromString("par4")),
+      insertRow(StringData.fromString("id8"), StringData.fromString("Han"), 56,
+          TimestampData.fromEpochMillis(8000), StringData.fromString("par4"))
+  );
+
+  public static List<RowData> DATA_SET_DISORDER_UPDATE_DELETE = Arrays.asList(
+      // DISORDER UPDATE
+      updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 21,
+          TimestampData.fromEpochMillis(3), StringData.fromString("par1")),
+      updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 20,
+          TimestampData.fromEpochMillis(2), StringData.fromString("par1")),
+      updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 23,
+          TimestampData.fromEpochMillis(1), StringData.fromString("par1")),
+      updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 20,
+          TimestampData.fromEpochMillis(2), StringData.fromString("par1")),
+      updateAfterRow(StringData.fromString("id1"), StringData.fromString("Danny"), 22,
+          TimestampData.fromEpochMillis(4), StringData.fromString("par1")),
+      updateBeforeRow(StringData.fromString("id1"), StringData.fromString("Danny"), 21,
+          TimestampData.fromEpochMillis(3), StringData.fromString("par1")),
+      // DISORDER DELETE
+      deleteRow(StringData.fromString("id1"), StringData.fromString("Danny"), 22,
+          TimestampData.fromEpochMillis(2), StringData.fromString("par1"))
+  );
+
+  public static List<RowData> dataSetInsert(int... ids) {
+    List<RowData> inserts = new ArrayList<>();
+    Arrays.stream(ids).forEach(i -> inserts.add(
+        insertRow(StringData.fromString("id" + i), StringData.fromString("Danny"), 23,
+            TimestampData.fromEpochMillis(i), StringData.fromString("par1"))));
+    return inserts;
+  }
+
+  private static Integer toIdSafely(Object id) {
+    if (id == null) {
+      return -1;
+    }
+    final String idStr = id.toString();
+    if (idStr.startsWith("id")) {
+      return Integer.parseInt(idStr.substring(2));
+    }
+    return -1;
+  }
+
+  /**
+   * Returns string format of a list of RowData.
+   */
+  public static String rowDataToString(List<RowData> rows) {
+    DataStructureConverter<Object, Object> converter =
+        DataStructureConverters.getConverter(QuickstartConfigurations.ROW_DATA_TYPE);
+    return rows.stream()
+        .sorted(Comparator.comparing(o -> toIdSafely(o.getString(0))))
+        .map(row -> converter.toExternal(row).toString())
+        .collect(Collectors.toList()).toString();
+  }
+
+  private static String toStringSafely(Object obj) {
+    return obj == null ? "null" : obj.toString();
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected string of the sorted rows
+   */
+  public static void assertRowsEquals(List<Row> rows, String expected) {
+    assertRowsEquals(rows, expected, false);
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows           Actual result rows
+   * @param expected       Expected string of the sorted rows
+   * @param withChangeFlag Whether compares with change flags
+   */
+  public static void assertRowsEquals(List<Row> rows, String expected, boolean withChangeFlag) {
+    String rowsString = rows.stream()
+        .sorted(Comparator.comparing(o -> toStringSafely(o.getField(0))))
+        .map(row -> {
+          final String rowStr = row.toString();
+          if (withChangeFlag) {
+            return row.getKind().shortString() + "(" + rowStr + ")";
+          } else {
+            return rowStr;
+          }
+        })
+        .collect(Collectors.toList()).toString();
+    assertThat(rowsString, is(expected));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index {@code orderingPos} and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows        Actual result rows
+   * @param expected    Expected string of the sorted rows
+   * @param orderingPos Field position for ordering
+   */
+  public static void assertRowsEquals(List<Row> rows, String expected, int orderingPos) {
+    String rowsString = rows.stream()
+        .sorted(Comparator.comparing(o -> toStringSafely(o.getField(orderingPos))))
+        .collect(Collectors.toList()).toString();
+    assertThat(rowsString, is(expected));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected row data list {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected row data list
+   */
+  public static void assertRowsEquals(List<Row> rows, List<RowData> expected) {
+    String rowsString = rows.stream()
+        .sorted(Comparator.comparing(o -> toIdSafely(o.getField(0))))
+        .collect(Collectors.toList()).toString();
+    assertThat(rowsString, is(rowDataToString(expected)));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected string {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected string of the sorted rows
+   */
+  public static void assertRowDataEquals(List<RowData> rows, String expected) {
+    String rowsString = rowDataToString(rows);
+    assertThat(rowsString, is(expected));
+  }
+
+  /**
+   * Sort the {@code rows} using field at index 0 and asserts
+   * it equals with the expected row data list {@code expected}.
+   *
+   * @param rows     Actual result rows
+   * @param expected Expected row data list
+   */
+  public static void assertRowDataEquals(List<RowData> rows, List<RowData> expected) {
+    String rowsString = rowDataToString(rows);
+    assertThat(rowsString, is(rowDataToString(expected)));
+  }
+
+  /**
+   * Checks the source data set are written as expected.
+   *
+   * <p>Note: Replace it with the Flink reader when it is supported.
+   *
+   * @param baseFile The file base to check, should be a directory
+   * @param expected The expected results mapping, the key should be the partition path
+   *                 and value should be values list with the key partition
+   */
+  public static void checkWrittenData(File baseFile, Map<String, String> expected) throws IOException {
+    checkWrittenData(baseFile, expected, 4);
+  }
+
+  /**
+   * Checks the source data set are written as expected.
+   *
+   * <p>Note: Replace it with the Flink reader when it is supported.
+   *
+   * @param baseFile   The file base to check, should be a directory
+   * @param expected   The expected results mapping, the key should be the partition path
+   *                   and value should be values list with the key partition
+   * @param partitions The expected partition number
+   */
+  public static void checkWrittenData(
+      File baseFile,
+      Map<String, String> expected,
+      int partitions) throws IOException {
+    assert baseFile.isDirectory();
+    FileFilter filter = file -> !file.getName().startsWith(".");
+    File[] partitionDirs = baseFile.listFiles(filter);
+    assertNotNull(partitionDirs);
+    assertThat(partitionDirs.length, is(partitions));
+    for (File partitionDir : partitionDirs) {
+      File[] dataFiles = partitionDir.listFiles(filter);
+      assertNotNull(dataFiles);
+      File latestDataFile = Arrays.stream(dataFiles)
+          .max(Comparator.comparing(f -> FSUtils.getCommitTime(f.getName())))
+          .orElse(dataFiles[0]);
+      ParquetReader<GenericRecord> reader = AvroParquetReader
+          .<GenericRecord>builder(new Path(latestDataFile.getAbsolutePath())).build();
+      List<String> readBuffer = new ArrayList<>();
+      GenericRecord nextRecord = reader.read();
+      while (nextRecord != null) {
+        readBuffer.add(filterOutVariables(nextRecord));
+        nextRecord = reader.read();
+      }
+      readBuffer.sort(Comparator.naturalOrder());
+      assertThat(readBuffer.toString(), is(expected.get(partitionDir.getName())));
+    }
+  }
+
+  /**
+   * Checks the MERGE_ON_READ source data are written as expected.
+   *
+   * <p>Note: Replace it with the Flink reader when it is supported.
+   *
+   * @param fs            The file system
+   * @param latestInstant The latest committed instant of current table
+   * @param baseFile      The file base to check, should be a directory
+   * @param expected      The expected results mapping, the key should be the partition path
+   * @param partitions    The expected partition number
+   * @param schema        The read schema
+   */
+  public static void checkWrittenDataMOR(
+      FileSystem fs,
+      String latestInstant,
+      File baseFile,
+      Map<String, String> expected,
+      int partitions,
+      Schema schema) {
+    assert baseFile.isDirectory() : "Base path should be a directory";
+    FileFilter partitionFilter = file -> !file.getName().startsWith(".");
+    File[] partitionDirs = baseFile.listFiles(partitionFilter);
+    assertNotNull(partitionDirs);
+    assertThat(partitionDirs.length, is(partitions));
+    for (File partitionDir : partitionDirs) {
+      File[] dataFiles = partitionDir.listFiles(file ->
+          file.getName().contains(".log.") && !file.getName().startsWith(".."));
+      assertNotNull(dataFiles);
+      HoodieMergedLogRecordScanner scanner = getScanner(
+          fs, baseFile.getPath(), Arrays.stream(dataFiles).map(File::getAbsolutePath)
+              .sorted(Comparator.naturalOrder()).collect(Collectors.toList()),
+          schema, latestInstant);
+      List<String> readBuffer = scanner.getRecords().values().stream()
+          .map(hoodieRecord -> {
+            try {
+              // in case it is a delete
+              GenericRecord record = (GenericRecord) hoodieRecord.getData()
+                  .getInsertValue(schema, new Properties())
+                  .orElse(null);
+              return record == null ? (String) null : filterOutVariables(record);
+            } catch (IOException e) {
+              throw new RuntimeException(e);
+            }
+          })
+          .filter(Objects::nonNull)
+          .sorted(Comparator.naturalOrder())
+          .collect(Collectors.toList());
+      assertThat(readBuffer.toString(), is(expected.get(partitionDir.getName())));
+    }
+  }
+
+  /**
+   * Returns the scanner to read avro log files.
+   */
+  private static HoodieMergedLogRecordScanner getScanner(
+      FileSystem fs,
+      String basePath,
+      List<String> logPaths,
+      Schema readSchema,
+      String instant) {
+    HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
+        .withFileSystem(fs)
+        .withBasePath(basePath)
+        .withLogFilePaths(logPaths)
+        .withReaderSchema(readSchema)
+        .withLatestInstantTime(instant)
+        .withReadBlocksLazily(false)
+        .withReverseReader(false)
+        .withBufferSize(16 * 1024 * 1024)
+        .withMaxMemorySizeInBytes(1024 * 1024L)
+        .withSpillableMapBasePath("/tmp/")
+        .withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
+        .withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue());
+    if (!isNullOrEmpty(logPaths)) {
+      logRecordScannerBuilder
+          .withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
+    }
+    return logRecordScannerBuilder.build();
+  }
+
+  /**
+   * Filter out the variables like file name.
+   */
+  private static String filterOutVariables(GenericRecord genericRecord) {
+    List<String> fields = new ArrayList<>();
+    fields.add(genericRecord.get("_hoodie_record_key").toString());
+    fields.add(genericRecord.get("_hoodie_partition_path").toString());
+    fields.add(genericRecord.get("uuid").toString());
+    fields.add(genericRecord.get("name").toString());
+    fields.add(genericRecord.get("age").toString());
+    fields.add(genericRecord.get("ts").toString());
+    fields.add(genericRecord.get("partition").toString());
+    return Strings.join(fields, ",");
+  }
+
+  public static BinaryRowData insertRow(Object... fields) {
+    return insertRow(QuickstartConfigurations.ROW_TYPE, fields);
+  }
+
+  public static BinaryRowData insertRow(RowType rowType, Object... fields) {
+    LogicalType[] types = rowType.getFields().stream().map(RowType.RowField::getType)
+        .toArray(LogicalType[]::new);
+    assertEquals(
+        "Filed count inconsistent with type information",
+        fields.length,
+        types.length);
+    BinaryRowData row = new BinaryRowData(fields.length);
+    BinaryRowWriter writer = new BinaryRowWriter(row);
+    writer.reset();
+    for (int i = 0; i < fields.length; i++) {
+      Object field = fields[i];
+      if (field == null) {
+        writer.setNullAt(i);
+      } else {
+        BinaryWriter.write(writer, i, field, types[i], InternalSerializers.create(types[i]));
+      }
+    }
+    writer.complete();
+    return row;
+  }
+
+  private static BinaryRowData deleteRow(Object... fields) {
+    BinaryRowData rowData = insertRow(fields);
+    rowData.setRowKind(RowKind.DELETE);
+    return rowData;
+  }
+
+  private static BinaryRowData updateBeforeRow(Object... fields) {
+    BinaryRowData rowData = insertRow(fields);
+    rowData.setRowKind(RowKind.UPDATE_BEFORE);
+    return rowData;
+  }
+
+  private static BinaryRowData updateAfterRow(Object... fields) {
+    BinaryRowData rowData = insertRow(fields);
+    rowData.setRowKind(RowKind.UPDATE_AFTER);
+    return rowData;
+  }
+}
diff --git a/hudi-examples/hudi-examples-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory b/hudi-examples/hudi-examples-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory
new file mode 100644
index 0000000000000..27a137292b388
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/test/resources/META-INF/services/org.apache.flink.table.factories.Factory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hudi.examples.quickstart.factory.ContinuousFileSourceFactory
+org.apache.hudi.examples.quickstart.factory.CollectSinkTableFactory
diff --git a/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire-quiet.properties b/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire-quiet.properties
new file mode 100644
index 0000000000000..2b94ea2903067
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire-quiet.properties
@@ -0,0 +1,30 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# CONSOLE is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# CONSOLE uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire.properties b/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire.properties
new file mode 100644
index 0000000000000..8dcd17f303f6b
--- /dev/null
+++ b/hudi-examples/hudi-examples-flink/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,31 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=INFO, CONSOLE
+log4j.logger.org.apache=INFO
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=INFO
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-examples/hudi-examples-java/pom.xml b/hudi-examples/hudi-examples-java/pom.xml
new file mode 100644
index 0000000000000..965cdef972194
--- /dev/null
+++ b/hudi-examples/hudi-examples-java/pom.xml
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.12.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-java</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+        </resources>
+
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-dependencies</id>
+                        <phase>prepare-package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
+                            <overWriteReleases>true</overWriteReleases>
+                            <overWriteSnapshots>true</overWriteSnapshots>
+                            <overWriteIfNewer>true</overWriteIfNewer>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-examples-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-java-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java b/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
similarity index 100%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
rename to hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
diff --git a/hudi-examples/hudi-examples-spark/pom.xml b/hudi-examples/hudi-examples-spark/pom.xml
new file mode 100644
index 0000000000000..90509e6b6a29d
--- /dev/null
+++ b/hudi-examples/hudi-examples-spark/pom.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-examples</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.12.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-examples-spark</artifactId>
+
+    <properties>
+        <main.basedir>${project.parent.basedir}</main.basedir>
+        <checkstyle.skip>true</checkstyle.skip>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+        </resources>
+
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-dependencies</id>
+                        <phase>prepare-package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
+                            <overWriteReleases>true</overWriteReleases>
+                            <overWriteSnapshots>true</overWriteSnapshots>
+                            <overWriteIfNewer>true</overWriteIfNewer>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+            <version>${scala.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-examples-common</artifactId>
+            <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-cli</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-java-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark-client</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-hadoop-mr</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-timeline-service</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <!-- Spark -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+        </dependency>
+
+        <!-- Parquet -->
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+            <version>${parquet.version}</version>
+        </dependency>
+
+        <!-- Avro -->
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+        </dependency>
+
+        <!--  Hive  -->
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>${hive.groupid}</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <scope>provided</scope>
+            <classifier>${hive.exec.classifier}</classifier>
+            <exclusions>
+                <exclusion>
+                    <groupId>javax.mail</groupId>
+                    <artifactId>mail</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.eclipse.jetty.aggregate</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Junit dependencies -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.vintage</groupId>
+            <artifactId>junit-vintage-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-params</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-junit-jupiter</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Hudi Tests -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-spark-client</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
similarity index 99%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
rename to hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
index 4486a4286c43f..c974d9ad73313 100644
--- a/hudi-examples/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/ExampleDataSchemaProvider.java
@@ -23,7 +23,6 @@
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.spark.api.java.JavaSparkContext;
 
-
 /**
  * the example SchemaProvider of example json data from uber.
  */
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java
similarity index 100%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java
rename to hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/IdentityTransformer.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/IdentityTransformer.java
similarity index 100%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/common/IdentityTransformer.java
rename to hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/IdentityTransformer.java
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/RandomJsonSource.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/RandomJsonSource.java
similarity index 100%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/common/RandomJsonSource.java
rename to hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/RandomJsonSource.java
diff --git a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
new file mode 100644
index 0000000000000..9f8e29d68773f
--- /dev/null
+++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import org.apache.hudi.QuickstartUtils;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.examples.common.HoodieExampleDataGenerator;
+import org.apache.hudi.examples.common.HoodieExampleSparkUtils;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+import java.util.List;
+
+import static org.apache.hudi.config.HoodieWriteConfig.TBL_NAME;
+import static org.apache.spark.sql.SaveMode.Append;
+import static org.apache.spark.sql.SaveMode.Overwrite;
+
+public final class HoodieSparkQuickstart {
+
+  private HoodieSparkQuickstart() {
+  }
+
+  public static void main(String[] args) {
+    if (args.length < 2) {
+      System.err.println("Usage: HoodieWriteClientExample <tablePath> <tableName>");
+      System.exit(1);
+    }
+    String tablePath = args[0];
+    String tableName = args[1];
+
+    SparkSession spark = HoodieExampleSparkUtils.defaultSparkSession("Hudi Spark basic example");
+    SparkConf sparkConf = HoodieExampleSparkUtils.defaultSparkConf("hoodie-client-example");
+
+    try (JavaSparkContext jsc = new JavaSparkContext(sparkConf)) {
+      final HoodieExampleDataGenerator<HoodieAvroPayload> dataGen = new HoodieExampleDataGenerator<>();
+
+      insertData(spark, jsc, tablePath, tableName, dataGen);
+      updateData(spark, jsc, tablePath, tableName, dataGen);
+      queryData(spark, jsc, tablePath, tableName, dataGen);
+
+      incrementalQuery(spark, tablePath, tableName);
+      pointInTimeQuery(spark, tablePath, tableName);
+
+      delete(spark, tablePath, tableName);
+      deleteByPartition(spark, tablePath, tableName);
+    }
+  }
+
+  /**
+   * Generate some new trips, load them into a DataFrame and write the DataFrame into the Hudi dataset as below.
+   */
+  public static void insertData(SparkSession spark, JavaSparkContext jsc, String tablePath, String tableName,
+                                HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
+    String commitTime = Long.toString(System.currentTimeMillis());
+    List<String> inserts = dataGen.convertToStringList(dataGen.generateInserts(commitTime, 20));
+    Dataset<Row> df = spark.read().json(jsc.parallelize(inserts, 1));
+    df.write().format("org.apache.hudi")
+        .options(QuickstartUtils.getQuickstartWriteConfigs())
+        .option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
+        .option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
+        .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath")
+        .option(TBL_NAME.key(), tableName)
+        .mode(Overwrite)
+        .save(tablePath);
+  }
+
+  /**
+   * Load the data files into a DataFrame.
+   */
+  public static void queryData(SparkSession spark, JavaSparkContext jsc, String tablePath, String tableName,
+                               HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
+    Dataset<Row> roViewDF = spark
+        .read()
+        .format("org.apache.hudi")
+        .load(tablePath + "/*/*/*/*");
+
+    roViewDF.createOrReplaceTempView("hudi_ro_table");
+
+    spark.sql("select fare, begin_lon, begin_lat, ts from  hudi_ro_table where fare > 20.0").show();
+    //  +-----------------+-------------------+-------------------+---+
+    //  |             fare|          begin_lon|          begin_lat| ts|
+    //  +-----------------+-------------------+-------------------+---+
+    //  |98.88075495133515|0.39556048623031603|0.17851135255091155|0.0|
+    //  ...
+
+    spark.sql(
+            "select _hoodie_commit_time, _hoodie_record_key, _hoodie_partition_path, rider, driver, fare from  hudi_ro_table")
+        .show();
+    //  +-------------------+--------------------+----------------------+-------------------+--------------------+------------------+
+    //  |_hoodie_commit_time|  _hoodie_record_key|_hoodie_partition_path|              rider|              driver|              fare|
+    //  +-------------------+--------------------+----------------------+-------------------+--------------------+------------------+
+    //  |     20191231181501|31cafb9f-0196-4b1...|            2020/01/02|rider-1577787297889|driver-1577787297889| 98.88075495133515|
+    //  ...
+  }
+
+  /**
+   * This is similar to inserting new data. Generate updates to existing trips using the data generator,
+   * load into a DataFrame and write DataFrame into the hudi dataset.
+   */
+  public static void updateData(SparkSession spark, JavaSparkContext jsc, String tablePath, String tableName,
+                                HoodieExampleDataGenerator<HoodieAvroPayload> dataGen) {
+
+    String commitTime = Long.toString(System.currentTimeMillis());
+    List<String> updates = dataGen.convertToStringList(dataGen.generateUpdates(commitTime, 10));
+    Dataset<Row> df = spark.read().json(jsc.parallelize(updates, 1));
+    df.write().format("org.apache.hudi")
+        .options(QuickstartUtils.getQuickstartWriteConfigs())
+        .option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
+        .option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
+        .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath")
+        .option(TBL_NAME.key(), tableName)
+        .mode(Append)
+        .save(tablePath);
+  }
+
+  /**
+   * Deleta data based in data information.
+   */
+  public static void delete(SparkSession spark, String tablePath, String tableName) {
+
+    Dataset<Row> roViewDF = spark.read().format("org.apache.hudi").load(tablePath + "/*/*/*/*");
+    roViewDF.createOrReplaceTempView("hudi_ro_table");
+    Dataset<Row> df = spark.sql("select uuid, partitionpath, ts from  hudi_ro_table limit 2");
+
+    df.write().format("org.apache.hudi")
+        .options(QuickstartUtils.getQuickstartWriteConfigs())
+        .option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
+        .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "uuid")
+        .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath")
+        .option(TBL_NAME.key(), tableName)
+        .option("hoodie.datasource.write.operation", WriteOperationType.DELETE.value())
+        .mode(Append)
+        .save(tablePath);
+  }
+
+  /**
+   * Delete the data of a single or multiple partitions.
+   */
+  public static void deleteByPartition(SparkSession spark, String tablePath, String tableName) {
+    Dataset<Row> df = spark.emptyDataFrame();
+    df.write().format("org.apache.hudi")
+        .options(QuickstartUtils.getQuickstartWriteConfigs())
+        .option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts")
+        .option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid")
+        .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath")
+        .option(TBL_NAME.key(), tableName)
+        .option("hoodie.datasource.write.operation", WriteOperationType.DELETE.value())
+        .option("hoodie.datasource.write.partitions.to.delete",
+            String.join(", ", HoodieExampleDataGenerator.DEFAULT_PARTITION_PATHS))
+        .mode(Append)
+        .save(tablePath);
+  }
+
+  /**
+   * Hudi also provides capability to obtain a stream of records that changed since given commit timestamp.
+   * This can be achieved using Hudi’s incremental view and providing a begin time from which changes need to be streamed.
+   * We do not need to specify endTime, if we want all changes after the given commit (as is the common case).
+   */
+  public static void incrementalQuery(SparkSession spark, String tablePath, String tableName) {
+    List<String> commits =
+        spark.sql("select distinct(_hoodie_commit_time) as commitTime from hudi_ro_table order by commitTime")
+            .toJavaRDD()
+            .map((Function<Row, String>) row -> row.getString(0))
+            .take(50);
+
+    String beginTime = commits.get(commits.size() - 2); // commit time we are interested in
+
+    // incrementally query data
+    Dataset<Row> incViewDF = spark
+        .read()
+        .format("org.apache.hudi")
+        .option("hoodie.datasource.query.type", "incremental")
+        .option("hoodie.datasource.read.begin.instanttime", beginTime)
+        .load(tablePath);
+
+    incViewDF.createOrReplaceTempView("hudi_incr_table");
+    spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_incr_table where fare > 20.0")
+        .show();
+  }
+
+  /**
+   * Lets look at how to query data as of a specific time.
+   * The specific time can be represented by pointing endTime to a specific commit time
+   * and beginTime to “000” (denoting earliest possible commit time).
+   */
+  public static void pointInTimeQuery(SparkSession spark, String tablePath, String tableName) {
+    List<String> commits =
+        spark.sql("select distinct(_hoodie_commit_time) as commitTime from  hudi_ro_table order by commitTime")
+            .toJavaRDD()
+            .map((Function<Row, String>) row -> row.getString(0))
+            .take(50);
+    String beginTime = "000"; // Represents all commits > this time.
+    String endTime = commits.get(commits.size() - 2); // commit time we are interested in
+
+    //incrementally query data
+    Dataset<Row> incViewDF = spark.read().format("org.apache.hudi")
+        .option("hoodie.datasource.query.type", "incremental")
+        .option("hoodie.datasource.read.begin.instanttime", beginTime)
+        .option("hoodie.datasource.read.end.instanttime", endTime)
+        .load(tablePath);
+
+    incViewDF.createOrReplaceTempView("hudi_incr_table");
+    spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from  hudi_incr_table where fare > 20.0")
+        .show();
+  }
+}
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieSparkBootstrapExample.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieSparkBootstrapExample.java
similarity index 100%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieSparkBootstrapExample.java
rename to hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieSparkBootstrapExample.java
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
similarity index 100%
rename from hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
rename to hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
diff --git a/hudi-examples/src/main/resources/delta-streamer-config/dfs/source-file.json b/hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/dfs/source-file.json
similarity index 100%
rename from hudi-examples/src/main/resources/delta-streamer-config/dfs/source-file.json
rename to hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/dfs/source-file.json
diff --git a/hudi-examples/src/main/resources/delta-streamer-config/kafka/kafka-source.properties b/hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/kafka/kafka-source.properties
similarity index 100%
rename from hudi-examples/src/main/resources/delta-streamer-config/kafka/kafka-source.properties
rename to hudi-examples/hudi-examples-spark/src/main/resources/delta-streamer-config/kafka/kafka-source.properties
diff --git a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
similarity index 99%
rename from hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
rename to hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
index 77b3885e3cf7a..33c085cba3eb6 100644
--- a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
+++ b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
@@ -172,7 +172,7 @@ object HoodieDataSourceExample {
     * This can be achieved using Hudi’s incremental view and providing a begin time from which changes need to be streamed.
     * We do not need to specify endTime, if we want all changes after the given commit (as is the common case).
     */
-  def incrementalQuery(spark: SparkSession, tablePath: String, tableName: String) {
+  def incrementalQuery(spark: SparkSession, tablePath: String, tableName: String): Unit = {
     import spark.implicits._
     val commits = spark.sql("select distinct(_hoodie_commit_time) as commitTime from hudi_ro_table order by commitTime").map(k => k.getString(0)).take(50)
     val beginTime = commits(commits.length - 2) // commit time we are interested in
@@ -193,7 +193,7 @@ object HoodieDataSourceExample {
     * The specific time can be represented by pointing endTime to a specific commit time
     * and beginTime to “000” (denoting earliest possible commit time).
     */
-  def pointInTimeQuery(spark: SparkSession, tablePath: String, tableName: String) {
+  def pointInTimeQuery(spark: SparkSession, tablePath: String, tableName: String): Unit = {
     import spark.implicits._
     val commits = spark.sql("select distinct(_hoodie_commit_time) as commitTime from  hudi_ro_table order by commitTime").map(k => k.getString(0)).take(50)
     val beginTime = "000" // Represents all commits > this time.
diff --git a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
similarity index 99%
rename from hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
rename to hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
index 2d96f9c6c39dd..8a2c8715b30eb 100644
--- a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
+++ b/hudi-examples/hudi-examples-spark/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala
@@ -55,6 +55,7 @@ object HoodieMorCompactionJob {
     val dataGen = new HoodieExampleDataGenerator[HoodieAvroPayload]
     val tablePath = args(0)
     val tableName = args(1)
+
     insertData(spark, tablePath, tableName, dataGen, HoodieTableType.MERGE_ON_READ.name())
     updateData(spark, tablePath, tableName, dataGen, HoodieTableType.MERGE_ON_READ.name())
     val cfg = HoodieWriteConfig.newBuilder()
diff --git a/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java b/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java
new file mode 100644
index 0000000000000..212dcc440933f
--- /dev/null
+++ b/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.examples.quickstart;
+
+import org.apache.hudi.client.HoodieReadClient;
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.examples.common.HoodieExampleDataGenerator;
+import org.apache.hudi.testutils.providers.SparkProvider;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.util.Utils;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.File;
+import java.nio.file.Paths;
+
+public class TestHoodieSparkQuickstart implements SparkProvider {
+  protected static transient HoodieSparkEngineContext context;
+
+  private static transient SparkSession spark;
+  private static transient SQLContext sqlContext;
+  private static transient JavaSparkContext jsc;
+
+  /**
+   * An indicator of the initialization status.
+   */
+  protected boolean initialized = false;
+  @TempDir
+  protected java.nio.file.Path tempDir;
+
+  private static final HoodieExampleDataGenerator<HoodieAvroPayload> DATA_GEN = new HoodieExampleDataGenerator<>();
+
+  @Override
+  public SparkSession spark() {
+    return spark;
+  }
+
+  @Override
+  public SQLContext sqlContext() {
+    return sqlContext;
+  }
+
+  @Override
+  public JavaSparkContext jsc() {
+    return jsc;
+  }
+
+  @Override
+  public HoodieSparkEngineContext context() {
+    return context;
+  }
+
+  public String basePath() {
+    return tempDir.toAbsolutePath().toString();
+  }
+
+  public String tablePath(String tableName) {
+    return Paths.get(basePath(), tableName).toString();
+  }
+
+  @BeforeEach
+  public synchronized void runBeforeEach() {
+    initialized = spark != null;
+    if (!initialized) {
+      SparkConf sparkConf = conf();
+      SparkRDDWriteClient.registerClasses(sparkConf);
+      HoodieReadClient.addHoodieSupport(sparkConf);
+      spark = SparkSession.builder().config(sparkConf).getOrCreate();
+      sqlContext = spark.sqlContext();
+      jsc = new JavaSparkContext(spark.sparkContext());
+      context = new HoodieSparkEngineContext(jsc);
+    }
+  }
+
+  @Test
+  public void testHoodieSparkQuickstart() {
+    String tableName = "spark_quick_start";
+    String tablePath = tablePath(tableName);
+
+    try {
+      HoodieSparkQuickstart.insertData(spark, jsc, tablePath, tableName, DATA_GEN);
+      HoodieSparkQuickstart.updateData(spark, jsc, tablePath, tableName, DATA_GEN);
+
+      HoodieSparkQuickstart.queryData(spark, jsc, tablePath, tableName, DATA_GEN);
+      HoodieSparkQuickstart.incrementalQuery(spark, tablePath, tableName);
+      HoodieSparkQuickstart.pointInTimeQuery(spark, tablePath, tableName);
+
+      HoodieSparkQuickstart.delete(spark, tablePath, tableName);
+      HoodieSparkQuickstart.deleteByPartition(spark, tablePath, tableName);
+    } finally {
+      Utils.deleteRecursively(new File(tablePath));
+    }
+  }
+}
diff --git a/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire-quiet.properties b/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire-quiet.properties
new file mode 100644
index 0000000000000..2b94ea2903067
--- /dev/null
+++ b/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire-quiet.properties
@@ -0,0 +1,30 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# CONSOLE is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# CONSOLE uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
\ No newline at end of file
diff --git a/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire.properties b/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire.properties
new file mode 100644
index 0000000000000..8dcd17f303f6b
--- /dev/null
+++ b/hudi-examples/hudi-examples-spark/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,31 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=INFO, CONSOLE
+log4j.logger.org.apache=INFO
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=INFO
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml
index 2ea284f203209..1d6c94954c08a 100644
--- a/hudi-examples/pom.xml
+++ b/hudi-examples/pom.xml
@@ -20,209 +20,18 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-examples</artifactId>
-  <packaging>jar</packaging>
+  <packaging>pom</packaging>
 
-  <properties>
-    <main.basedir>${project.parent.basedir}</main.basedir>
-    <checkstyle.skip>true</checkstyle.skip>
-  </properties>
+  <modules>
+    <module>hudi-examples-common</module>
+    <module>hudi-examples-spark</module>
+    <module>hudi-examples-flink</module>
+    <module>hudi-examples-java</module>
+  </modules>
 
-  <build>
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-      </resource>
-    </resources>
-
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>copy-dependencies</id>
-            <phase>prepare-package</phase>
-            <goals>
-              <goal>copy-dependencies</goal>
-            </goals>
-            <configuration>
-              <outputDirectory>${project.build.directory}/lib</outputDirectory>
-              <overWriteReleases>true</overWriteReleases>
-              <overWriteSnapshots>true</overWriteSnapshots>
-              <overWriteIfNewer>true</overWriteIfNewer>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>net.alchim31.maven</groupId>
-        <artifactId>scala-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>scala-compile-first</id>
-            <phase>process-resources</phase>
-            <goals>
-              <goal>add-source</goal>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>compile</phase>
-            <goals>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-            <phase>test-compile</phase>
-          </execution>
-        </executions>
-        <configuration>
-          <skip>false</skip>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-common</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-cli</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-client-common</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-java-client</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark-client</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-hadoop-mr</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-timeline-service</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <!-- Spark -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-    </dependency>
-
-    <!-- Parquet -->
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-hadoop</artifactId>
-      <version>${parquet.version}</version>
-    </dependency>
-
-    <!-- Avro -->
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-avro</artifactId>
-    </dependency>
-
-    <!--  Hive  -->
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-common</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-exec</artifactId>
-      <version>${hive.version}</version>
-      <scope>provided</scope>
-      <classifier>${hive.exec.classifier}</classifier>
-      <exclusions>
-        <exclusion>
-          <groupId>javax.mail</groupId>
-          <artifactId>mail</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.eclipse.jetty.aggregate</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-  </dependencies>
 </project>
diff --git a/hudi-flink-datasource/hudi-flink/pom.xml b/hudi-flink-datasource/hudi-flink/pom.xml
index bd05e7c10a4d7..97288d19cd35c 100644
--- a/hudi-flink-datasource/hudi-flink/pom.xml
+++ b/hudi-flink-datasource/hudi-flink/pom.xml
@@ -22,17 +22,17 @@
     <parent>
         <artifactId>hudi-flink-datasource</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-flink</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <properties>
         <main.basedir>${project.parent.parent.basedir}</main.basedir>
-        <parquet.version>1.11.1</parquet.version>
+        <parquet.version>${flink.format.parquet.version}</parquet.version>
     </properties>
 
     <build>
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
index cd5c2a70e1d48..e2be7d364b77f 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -459,17 +459,17 @@ private FlinkOptions() {
       .withDescription("Timeout limit for a writer task after it finishes a checkpoint and\n"
           + "waits for the instant commit success, only for internal use");
 
-  public static final ConfigOption<Boolean> WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION = ConfigOptions
-      .key("write.bulk_insert.shuffle_by_partition")
+  public static final ConfigOption<Boolean> WRITE_BULK_INSERT_SHUFFLE_INPUT = ConfigOptions
+      .key("write.bulk_insert.shuffle_input")
       .booleanType()
       .defaultValue(true)
-      .withDescription("Whether to shuffle the inputs by partition path for bulk insert tasks, default true");
+      .withDescription("Whether to shuffle the inputs by specific fields for bulk insert tasks, default true");
 
-  public static final ConfigOption<Boolean> WRITE_BULK_INSERT_SORT_BY_PARTITION = ConfigOptions
-      .key("write.bulk_insert.sort_by_partition")
+  public static final ConfigOption<Boolean> WRITE_BULK_INSERT_SORT_INPUT = ConfigOptions
+      .key("write.bulk_insert.sort_input")
       .booleanType()
       .defaultValue(true)
-      .withDescription("Whether to sort the inputs by partition path for bulk insert tasks, default true");
+      .withDescription("Whether to sort the inputs by specific fields for bulk insert tasks, default true");
 
   public static final ConfigOption<Integer> WRITE_SORT_MEMORY = ConfigOptions
       .key("write.sort.memory")
@@ -671,7 +671,7 @@ private FlinkOptions() {
   public static final ConfigOption<Boolean> HIVE_SYNC_SUPPORT_TIMESTAMP = ConfigOptions
       .key("hive_sync.support_timestamp")
       .booleanType()
-      .defaultValue(false)
+      .defaultValue(true)
       .withDescription("INT64 with original type TIMESTAMP_MICROS is converted to hive timestamp type.\n"
           + "Disabled by default for backward compatibility.");
 
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
index c4f2e771c5baf..b5ec08a583d43 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
@@ -30,7 +30,6 @@
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.configuration.OptionsResolver;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.sink.event.CommitAckEvent;
 import org.apache.hudi.sink.event.WriteMetadataEvent;
 import org.apache.hudi.sink.meta.CkpMetadata;
 import org.apache.hudi.sink.utils.HiveSyncContext;
@@ -43,7 +42,6 @@
 import org.apache.flink.runtime.jobgraph.OperatorID;
 import org.apache.flink.runtime.operators.coordination.OperatorCoordinator;
 import org.apache.flink.runtime.operators.coordination.OperatorEvent;
-import org.apache.flink.runtime.operators.coordination.TaskNotRunningException;
 import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -431,31 +429,6 @@ private void handleWriteMetaEvent(WriteMetadataEvent event) {
     addEventToBuffer(event);
   }
 
-  /**
-   * The coordinator reuses the instant if there is no data for this round of checkpoint,
-   * sends the commit ack events to unblock the flushing.
-   */
-  private void sendCommitAckEvents(long checkpointId) {
-    CompletableFuture<?>[] futures = Arrays.stream(this.gateways).filter(Objects::nonNull)
-        .map(gw -> gw.sendEvent(CommitAckEvent.getInstance(checkpointId)))
-        .toArray(CompletableFuture<?>[]::new);
-    CompletableFuture.allOf(futures).whenComplete((resp, error) -> {
-      if (!sendToFinishedTasks(error)) {
-        throw new HoodieException("Error while waiting for the commit ack events to finish sending", error);
-      }
-    });
-  }
-
-  /**
-   * Decides whether the given exception is caused by sending events to FINISHED tasks.
-   *
-   * <p>Ugly impl: the exception may change in the future.
-   */
-  private static boolean sendToFinishedTasks(Throwable throwable) {
-    return throwable.getCause() instanceof TaskNotRunningException
-        || throwable.getCause().getMessage().contains("running");
-  }
-
   /**
    * Commits the instant.
    */
@@ -483,8 +456,6 @@ private boolean commitInstant(String instant, long checkpointId) {
     if (writeResults.size() == 0) {
       // No data has written, reset the buffer and returns early
       reset();
-      // Send commit ack event to the write function to unblock the flushing
-      sendCommitAckEvents(checkpointId);
       return false;
     }
     doCommit(instant, writeResults);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketBulkInsertWriterHelper.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketBulkInsertWriterHelper.java
new file mode 100644
index 0000000000000..5c44581c94836
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketBulkInsertWriterHelper.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bucket;
+
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.bucket.BucketIdentifier;
+import org.apache.hudi.io.storage.row.HoodieRowDataCreateHandle;
+import org.apache.hudi.sink.bulk.BulkInsertWriterHelper;
+import org.apache.hudi.sink.bulk.RowDataKeyGen;
+import org.apache.hudi.sink.bulk.sort.SortOperatorGen;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ * Helper class for bucket index bulk insert used by Flink.
+ */
+public class BucketBulkInsertWriterHelper extends BulkInsertWriterHelper {
+  private static final Logger LOG = LoggerFactory.getLogger(BucketBulkInsertWriterHelper.class);
+  public static final String FILE_GROUP_META_FIELD = "_fg";
+
+  private final int recordArity;
+
+  private String lastFileId; // for efficient code path
+
+  public BucketBulkInsertWriterHelper(Configuration conf, HoodieTable<?, ?, ?, ?> hoodieTable, HoodieWriteConfig writeConfig,
+                                      String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
+    super(conf, hoodieTable, writeConfig, instantTime, taskPartitionId, taskId, taskEpochId, rowType);
+    this.recordArity = rowType.getFieldCount();
+  }
+
+  public void write(RowData tuple) throws IOException {
+    try {
+      RowData record = tuple.getRow(1, this.recordArity);
+      String recordKey = keyGen.getRecordKey(record);
+      String partitionPath = keyGen.getPartitionPath(record);
+      String fileId = tuple.getString(0).toString();
+      if ((lastFileId == null) || !lastFileId.equals(fileId)) {
+        LOG.info("Creating new file for partition path " + partitionPath);
+        handle = getRowCreateHandle(partitionPath, fileId);
+        lastFileId = fileId;
+      }
+      handle.write(recordKey, partitionPath, record);
+    } catch (Throwable throwable) {
+      LOG.error("Global error thrown while trying to write records in HoodieRowDataCreateHandle", throwable);
+      throw throwable;
+    }
+  }
+
+  private HoodieRowDataCreateHandle getRowCreateHandle(String partitionPath, String fileId) throws IOException {
+    if (!handles.containsKey(fileId)) { // if there is no handle corresponding to the fileId
+      if (this.isInputSorted) {
+        // if records are sorted, we can close all existing handles
+        close();
+      }
+      HoodieRowDataCreateHandle rowCreateHandle = new HoodieRowDataCreateHandle(hoodieTable, writeConfig, partitionPath, fileId,
+          instantTime, taskPartitionId, taskId, taskEpochId, rowType);
+      handles.put(fileId, rowCreateHandle);
+    }
+    return handles.get(fileId);
+  }
+
+  public static SortOperatorGen getFileIdSorterGen(RowType rowType) {
+    return new SortOperatorGen(rowType, new String[] {FILE_GROUP_META_FIELD});
+  }
+
+  private static String getFileId(Map<String, String> bucketIdToFileId, RowDataKeyGen keyGen, RowData record, String indexKeys, int numBuckets) {
+    String recordKey = keyGen.getRecordKey(record);
+    String partition = keyGen.getPartitionPath(record);
+    final int bucketNum = BucketIdentifier.getBucketId(recordKey, indexKeys, numBuckets);
+    String bucketId = partition + bucketNum;
+    return bucketIdToFileId.computeIfAbsent(bucketId, k -> BucketIdentifier.newBucketFileIdPrefix(bucketNum));
+  }
+
+  public static RowData rowWithFileId(Map<String, String> bucketIdToFileId, RowDataKeyGen keyGen, RowData record, String indexKeys, int numBuckets) {
+    final String fileId = getFileId(bucketIdToFileId, keyGen, record, indexKeys, numBuckets);
+    return GenericRowData.of(StringData.fromString(fileId), record);
+  }
+
+  public static RowType rowTypeWithFileId(RowType rowType) {
+    LogicalType[] types = new LogicalType[] {DataTypes.STRING().getLogicalType(), rowType};
+    String[] names = new String[] {FILE_GROUP_META_FIELD, "record"};
+    return RowType.of(types, names);
+  }
+}
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketStreamWriteFunction.java
similarity index 57%
rename from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java
rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketStreamWriteFunction.java
index 4c9e4dc25912b..1456e8882f024 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketStreamWriteFunction.java
@@ -16,17 +16,14 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.sink;
+package org.apache.hudi.sink.bucket;
 
-import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.util.Option;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.index.bucket.BucketIdentifier;
-import org.apache.hudi.table.HoodieFlinkTable;
+import org.apache.hudi.sink.StreamWriteFunction;
 
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.runtime.state.FunctionInitializationContext;
@@ -38,9 +35,8 @@
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
-
-import static java.util.stream.Collectors.toList;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * A stream write function with bucket hash index.
@@ -55,27 +51,34 @@ public class BucketStreamWriteFunction<I> extends StreamWriteFunction<I> {
 
   private static final Logger LOG = LoggerFactory.getLogger(BucketStreamWriteFunction.class);
 
-  private int maxParallelism;
-
   private int parallelism;
 
   private int bucketNum;
 
-  private transient HoodieFlinkTable<?> table;
-
   private String indexKeyFields;
 
   /**
-   * BucketID to file group mapping.
+   * BucketID should be loaded in this task.
+   */
+  private Set<Integer> bucketToLoad;
+
+  /**
+   * BucketID to file group mapping in each partition.
+   * Map(partition -> Map(bucketId, fileID)).
    */
-  private HashMap<String, String> bucketIndex;
+  private Map<String, Map<Integer, String>> bucketIndex;
 
   /**
    * Incremental bucket index of the current checkpoint interval,
    * it is needed because the bucket type('I' or 'U') should be decided based on the committed files view,
    * all the records in one bucket should have the same bucket type.
    */
-  private HashMap<String, String> incBucketIndex;
+  private Set<String> incBucketIndex;
+
+  /**
+   * Returns whether this is an empty table.
+   */
+  private boolean isEmptyTable;
 
   /**
    * Constructs a BucketStreamWriteFunction.
@@ -93,22 +96,20 @@ public void open(Configuration parameters) throws IOException {
     this.indexKeyFields = config.getString(FlinkOptions.INDEX_KEY_FIELD);
     this.taskID = getRuntimeContext().getIndexOfThisSubtask();
     this.parallelism = getRuntimeContext().getNumberOfParallelSubtasks();
-    this.maxParallelism = getRuntimeContext().getMaxNumberOfParallelSubtasks();
+    this.bucketToLoad = getBucketToLoad();
     this.bucketIndex = new HashMap<>();
-    this.incBucketIndex = new HashMap<>();
-    bootstrapIndex();
+    this.incBucketIndex = new HashSet<>();
+    this.isEmptyTable = !this.metaClient.getActiveTimeline().filterCompletedInstants().lastInstant().isPresent();
   }
 
   @Override
   public void initializeState(FunctionInitializationContext context) throws Exception {
     super.initializeState(context);
-    this.table = this.writeClient.getHoodieTable();
   }
 
   @Override
   public void snapshotState() {
     super.snapshotState();
-    this.bucketIndex.putAll(this.incBucketIndex);
     this.incBucketIndex.clear();
   }
 
@@ -116,17 +117,23 @@ public void snapshotState() {
   public void processElement(I i, ProcessFunction<I, Object>.Context context, Collector<Object> collector) throws Exception {
     HoodieRecord<?> record = (HoodieRecord<?>) i;
     final HoodieKey hoodieKey = record.getKey();
+    final String partition = hoodieKey.getPartitionPath();
     final HoodieRecordLocation location;
 
+    bootstrapIndexIfNeed(partition);
+    Map<Integer, String> bucketToFileId = bucketIndex.computeIfAbsent(partition, p -> new HashMap<>());
     final int bucketNum = BucketIdentifier.getBucketId(hoodieKey, indexKeyFields, this.bucketNum);
-    final String partitionBucketId = BucketIdentifier.partitionBucketIdStr(hoodieKey.getPartitionPath(), bucketNum);
+    final String bucketId = partition + bucketNum;
 
-    if (bucketIndex.containsKey(partitionBucketId)) {
-      location = new HoodieRecordLocation("U", bucketIndex.get(partitionBucketId));
+    if (incBucketIndex.contains(bucketId)) {
+      location = new HoodieRecordLocation("I", bucketToFileId.get(bucketNum));
+    } else if (bucketToFileId.containsKey(bucketNum)) {
+      location = new HoodieRecordLocation("U", bucketToFileId.get(bucketNum));
     } else {
       String newFileId = BucketIdentifier.newBucketFileIdPrefix(bucketNum);
       location = new HoodieRecordLocation("I", newFileId);
-      incBucketIndex.put(partitionBucketId, newFileId);
+      bucketToFileId.put(bucketNum, newFileId);
+      incBucketIndex.add(bucketId);
     }
     record.unseal();
     record.setCurrentLocation(location);
@@ -135,52 +142,48 @@ public void processElement(I i, ProcessFunction<I, Object>.Context context, Coll
   }
 
   /**
-   * Get partition_bucket -> fileID mapping from the existing hudi table.
-   * This is a required operation for each restart to avoid having duplicate file ids for one bucket.
+   * Bootstrap bucket info from existing file system,
+   * bucketNum % totalParallelism == this taskID belongs to this task.
    */
-  private void bootstrapIndex() throws IOException {
-    Option<HoodieInstant> latestCommitTime = table.getFileSystemView().getTimeline().filterCompletedInstants().lastInstant();
-    if (!latestCommitTime.isPresent()) {
-      return;
-    }
-    // bootstrap bucket info from existing file system
-    // bucketNum % totalParallelism == this taskID belongs to this task
-    HashSet<Integer> bucketToLoad = new HashSet<>();
+  private Set<Integer> getBucketToLoad() {
+    Set<Integer> bucketToLoad = new HashSet<>();
     for (int i = 0; i < bucketNum; i++) {
       int partitionOfBucket = BucketIdentifier.mod(i, parallelism);
       if (partitionOfBucket == taskID) {
-        LOG.info(String.format("Bootstrapping index. Adding bucket %s , "
-                + "Current parallelism: %s , Max parallelism: %s , Current task id: %s",
-            i, parallelism, maxParallelism, taskID));
         bucketToLoad.add(i);
       }
     }
-    bucketToLoad.forEach(bucket -> LOG.info(String.format("bucketToLoad contains %s", bucket)));
-
-    LOG.info(String.format("Loading Hoodie Table %s, with path %s", table.getMetaClient().getTableConfig().getTableName(),
-        table.getMetaClient().getBasePath()));
-
-    // Iterate through all existing partitions to load existing fileID belongs to this task
-    List<String> partitions = table.getMetadata().getAllPartitionPaths();
-    for (String partitionPath : partitions) {
-      List<FileSlice> latestFileSlices = table.getSliceView()
-          .getLatestFileSlices(partitionPath)
-          .collect(toList());
-      for (FileSlice fileslice : latestFileSlices) {
-        String fileID = fileslice.getFileId();
-        int bucketNumber = BucketIdentifier.bucketIdFromFileId(fileID);
-        if (bucketToLoad.contains(bucketNumber)) {
-          String partitionBucketId = BucketIdentifier.partitionBucketIdStr(partitionPath, bucketNumber);
-          LOG.info(String.format("Should load this partition bucket %s with fileID %s", partitionBucketId, fileID));
-          if (bucketIndex.containsKey(partitionBucketId)) {
-            throw new RuntimeException(String.format("Duplicate fileID %s from partitionBucket %s found "
-                + "during the BucketStreamWriteFunction index bootstrap.", fileID, partitionBucketId));
-          } else {
-            LOG.info(String.format("Adding fileID %s to the partition bucket %s.", fileID, partitionBucketId));
-            bucketIndex.put(partitionBucketId, fileID);
-          }
+    LOG.info("Bucket number that belongs to task [{}/{}]: {}", taskID, parallelism, bucketToLoad);
+    return bucketToLoad;
+  }
+
+  /**
+   * Get partition_bucket -> fileID mapping from the existing hudi table.
+   * This is a required operation for each restart to avoid having duplicate file ids for one bucket.
+   */
+  private void bootstrapIndexIfNeed(String partition) {
+    if (isEmptyTable || bucketIndex.containsKey(partition)) {
+      return;
+    }
+    LOG.info(String.format("Loading Hoodie Table %s, with path %s", this.metaClient.getTableConfig().getTableName(),
+        this.metaClient.getBasePath() + "/" + partition));
+
+    // Load existing fileID belongs to this task
+    Map<Integer, String> bucketToFileIDMap = new HashMap<>();
+    this.writeClient.getHoodieTable().getHoodieView().getLatestFileSlices(partition).forEach(fileSlice -> {
+      String fileID = fileSlice.getFileId();
+      int bucketNumber = BucketIdentifier.bucketIdFromFileId(fileID);
+      if (bucketToLoad.contains(bucketNumber)) {
+        LOG.info(String.format("Should load this partition bucket %s with fileID %s", bucketNumber, fileID));
+        if (bucketToFileIDMap.containsKey(bucketNumber)) {
+          throw new RuntimeException(String.format("Duplicate fileID %s from bucket %s of partition %s found "
+              + "during the BucketStreamWriteFunction index bootstrap.", fileID, bucketNumber, partition));
+        } else {
+          LOG.info(String.format("Adding fileID %s to the bucket %s of partition %s.", fileID, bucketNumber, partition));
+          bucketToFileIDMap.put(bucketNumber, fileID);
         }
       }
-    }
+    });
+    bucketIndex.put(partition, bucketToFileIDMap);
   }
 }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketStreamWriteOperator.java
similarity index 97%
rename from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java
rename to hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketStreamWriteOperator.java
index cf740cc2ccc59..a48ea44ddc44a 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bucket/BucketStreamWriteOperator.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.sink;
+package org.apache.hudi.sink.bucket;
 
 import org.apache.hudi.sink.common.AbstractWriteOperator;
 import org.apache.hudi.sink.common.WriteOperatorFactory;
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java
index 9b34c3edcd800..6c8dcef0f3925 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriteFunction.java
@@ -167,7 +167,7 @@ public void setOperatorEventGateway(OperatorEventGateway operatorEventGateway) {
 
   private void initWriterHelper() {
     String instant = instantToWrite();
-    this.writerHelper = new BulkInsertWriterHelper(this.config, this.writeClient.getHoodieTable(), this.writeClient.getConfig(),
+    this.writerHelper = WriterHelpers.getWriterHelper(this.config, this.writeClient.getHoodieTable(), this.writeClient.getConfig(),
         instant, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getAttemptNumber(),
         this.rowType);
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java
index 4bc8ae27fb5d5..4e1d189b5510f 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/BulkInsertWriterHelper.java
@@ -50,21 +50,21 @@ public class BulkInsertWriterHelper {
 
   private static final Logger LOG = LogManager.getLogger(BulkInsertWriterHelper.class);
 
-  private final String instantTime;
-  private final int taskPartitionId;
-  private final long taskId;
-  private final long taskEpochId;
-  private final HoodieTable hoodieTable;
-  private final HoodieWriteConfig writeConfig;
-  private final RowType rowType;
-  private final Boolean arePartitionRecordsSorted;
+  protected final String instantTime;
+  protected final int taskPartitionId;
+  protected final long taskId;
+  protected final long taskEpochId;
+  protected final HoodieTable hoodieTable;
+  protected final HoodieWriteConfig writeConfig;
+  protected final RowType rowType;
+  protected final Boolean isInputSorted;
   private final List<HoodieInternalWriteStatus> writeStatusList = new ArrayList<>();
-  private HoodieRowDataCreateHandle handle;
+  protected HoodieRowDataCreateHandle handle;
   private String lastKnownPartitionPath = null;
   private final String fileIdPrefix;
   private int numFilesWritten = 0;
-  private final Map<String, HoodieRowDataCreateHandle> handles = new HashMap<>();
-  private final RowDataKeyGen keyGen;
+  protected final Map<String, HoodieRowDataCreateHandle> handles = new HashMap<>();
+  protected final RowDataKeyGen keyGen;
 
   public BulkInsertWriterHelper(Configuration conf, HoodieTable hoodieTable, HoodieWriteConfig writeConfig,
                                 String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
@@ -75,7 +75,7 @@ public BulkInsertWriterHelper(Configuration conf, HoodieTable hoodieTable, Hoodi
     this.taskId = taskId;
     this.taskEpochId = taskEpochId;
     this.rowType = addMetadataFields(rowType, writeConfig.allowOperationMetadataField()); // patch up with metadata fields
-    this.arePartitionRecordsSorted = conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SORT_BY_PARTITION);
+    this.isInputSorted = conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SORT_INPUT);
     this.fileIdPrefix = UUID.randomUUID().toString();
     this.keyGen = RowDataKeyGen.instance(conf, rowType);
   }
@@ -112,7 +112,7 @@ public List<HoodieInternalWriteStatus> getHoodieWriteStatuses() throws IOExcepti
   private HoodieRowDataCreateHandle getRowCreateHandle(String partitionPath) throws IOException {
     if (!handles.containsKey(partitionPath)) { // if there is no handle corresponding to the partition path
       // if records are sorted, we can close all existing handles
-      if (arePartitionRecordsSorted) {
+      if (isInputSorted) {
         close();
       }
       HoodieRowDataCreateHandle rowCreateHandle = new HoodieRowDataCreateHandle(hoodieTable, writeConfig, partitionPath, getNextFileId(),
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/WriterHelpers.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/WriterHelpers.java
new file mode 100644
index 0000000000000..99a9ae114cd8e
--- /dev/null
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/WriterHelpers.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.bulk;
+
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.configuration.OptionsResolver;
+import org.apache.hudi.sink.bucket.BucketBulkInsertWriterHelper;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.types.logical.RowType;
+
+/**
+ * Factory clazz to generate bulk insert writer helpers.
+ */
+public class WriterHelpers {
+  public static BulkInsertWriterHelper getWriterHelper(Configuration conf, HoodieTable<?, ?, ?, ?> hoodieTable, HoodieWriteConfig writeConfig,
+                                                       String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
+    return OptionsResolver.isBucketIndexType(conf)
+        ? new BucketBulkInsertWriterHelper(conf, hoodieTable, writeConfig, instantTime, taskPartitionId, taskId, taskEpochId, rowType)
+        : new BulkInsertWriterHelper(conf, hoodieTable, writeConfig, instantTime, taskPartitionId, taskId, taskEpochId, rowType);
+  }
+}
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java
index f9cf938e44aee..4e8712b6619f2 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java
@@ -182,6 +182,8 @@ private void restoreWriteMetadata() throws Exception {
     boolean eventSent = false;
     for (WriteMetadataEvent event : this.writeMetadataState.get()) {
       if (Objects.equals(lastInflight, event.getInstantTime())) {
+        // Reset taskID for event
+        event.setTaskID(taskID);
         // The checkpoint succeed but the meta does not commit,
         // re-commit the inflight instant
         this.eventGateway.sendEventToCoordinator(event);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java
index d01db962c9ba4..c4b83bf51aace 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java
@@ -188,7 +188,6 @@ private void processRecord(HoodieRecord<?> record, Collector<O> out) throws Exce
           out.collect((O) deleteRecord);
         }
         location = getNewRecordLocation(partitionPath);
-        updateIndexState(partitionPath, location);
       } else {
         location = oldLoc.toLocal("U");
         this.bucketAssigner.addUpdate(partitionPath, location.getFileId());
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java
index 0c4e2a129da7a..b9b737ce22857 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.sink.partitioner;
 
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.index.bucket.BucketIdentifier;
 
 import org.apache.flink.api.common.functions.Partitioner;
@@ -29,7 +28,7 @@
  *
  * @param <T> The type of obj to hash
  */
-public class BucketIndexPartitioner<T extends HoodieKey> implements Partitioner<T> {
+public class BucketIndexPartitioner<T extends String> implements Partitioner<T> {
 
   private final int bucketNum;
   private final String indexKeyFields;
@@ -40,7 +39,7 @@ public BucketIndexPartitioner(int bucketNum, String indexKeyFields) {
   }
 
   @Override
-  public int partition(HoodieKey key, int numPartitions) {
+  public int partition(String key, int numPartitions) {
     int curBucket = BucketIdentifier.getBucketId(key, indexKeyFields, bucketNum);
     return BucketIdentifier.mod(curBucket, numPartitions);
   }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
index 768d36e0abe4b..52ffa85659161 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
@@ -18,10 +18,12 @@
 
 package org.apache.hudi.sink.utils;
 
+import org.apache.hudi.aws.sync.AwsGlueCatalogSyncTool;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.hive.HiveSyncConfig;
 import org.apache.hudi.hive.HiveSyncTool;
+import org.apache.hudi.hive.ddl.HiveSyncMode;
 import org.apache.hudi.table.format.FilePathUtils;
 import org.apache.hudi.util.StreamerUtil;
 
@@ -48,6 +50,10 @@ private HiveSyncContext(HiveSyncConfig syncConfig, HiveConf hiveConf, FileSystem
   }
 
   public HiveSyncTool hiveSyncTool() {
+    HiveSyncMode syncMode = HiveSyncMode.of(syncConfig.syncMode);
+    if (syncMode == HiveSyncMode.GLUE) {
+      return new AwsGlueCatalogSyncTool(this.syncConfig, this.hiveConf, this.fs);
+    }
     return new HiveSyncTool(this.syncConfig, this.hiveConf, this.fs);
   }
 
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
index 65d67fe95227a..28a669075da3c 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
@@ -18,16 +18,16 @@
 
 package org.apache.hudi.sink.utils;
 
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.configuration.OptionsResolver;
-import org.apache.hudi.sink.BucketStreamWriteOperator;
 import org.apache.hudi.sink.CleanFunction;
 import org.apache.hudi.sink.StreamWriteOperator;
 import org.apache.hudi.sink.append.AppendWriteOperator;
 import org.apache.hudi.sink.bootstrap.BootstrapOperator;
 import org.apache.hudi.sink.bootstrap.batch.BatchBootstrapOperator;
+import org.apache.hudi.sink.bucket.BucketBulkInsertWriterHelper;
+import org.apache.hudi.sink.bucket.BucketStreamWriteOperator;
 import org.apache.hudi.sink.bulk.BulkInsertWriteOperator;
 import org.apache.hudi.sink.bulk.RowDataKeyGen;
 import org.apache.hudi.sink.bulk.sort.SortOperatorGen;
@@ -54,8 +54,12 @@
 import org.apache.flink.streaming.api.operators.ProcessOperator;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.planner.plan.nodes.exec.utils.ExecNodeUtil;
+import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
 import org.apache.flink.table.types.logical.RowType;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Utilities to generate all kinds of sub-pipelines.
  */
@@ -88,11 +92,38 @@ public class Pipelines {
    */
   public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowType, DataStream<RowData> dataStream) {
     WriteOperatorFactory<RowData> operatorFactory = BulkInsertWriteOperator.getFactory(conf, rowType);
+    if (OptionsResolver.isBucketIndexType(conf)) {
+      String indexKeys = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
+      int numBuckets = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
+
+      BucketIndexPartitioner<String> partitioner = new BucketIndexPartitioner<>(numBuckets, indexKeys);
+      RowDataKeyGen keyGen = RowDataKeyGen.instance(conf, rowType);
+      RowType rowTypeWithFileId = BucketBulkInsertWriterHelper.rowTypeWithFileId(rowType);
+      InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.of(rowTypeWithFileId);
+
+      Map<String, String> bucketIdToFileId = new HashMap<>();
+      dataStream = dataStream.partitionCustom(partitioner, keyGen::getRecordKey)
+          .map(record -> BucketBulkInsertWriterHelper.rowWithFileId(bucketIdToFileId, keyGen, record, indexKeys, numBuckets), typeInfo)
+          .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS)); // same parallelism as write task to avoid shuffle
+      if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SORT_INPUT)) {
+        SortOperatorGen sortOperatorGen = BucketBulkInsertWriterHelper.getFileIdSorterGen(rowTypeWithFileId);
+        dataStream = dataStream.transform("file_sorter", typeInfo, sortOperatorGen.createSortOperator())
+            .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS)); // same parallelism as write task to avoid shuffle
+        ExecNodeUtil.setManagedMemoryWeight(dataStream.getTransformation(),
+            conf.getInteger(FlinkOptions.WRITE_SORT_MEMORY) * 1024L * 1024L);
+      }
+      return dataStream
+          .transform("bucket_bulk_insert", TypeInformation.of(Object.class), operatorFactory)
+          .uid("uid_bucket_bulk_insert" + conf.getString(FlinkOptions.TABLE_NAME))
+          .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS))
+          .addSink(DummySink.INSTANCE)
+          .name("dummy");
+    }
 
     final String[] partitionFields = FilePathUtils.extractPartitionKeys(conf);
     if (partitionFields.length > 0) {
       RowDataKeyGen rowDataKeyGen = RowDataKeyGen.instance(conf, rowType);
-      if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION)) {
+      if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_INPUT)) {
 
         // shuffle by partition keys
         // use #partitionCustom instead of #keyBy to avoid duplicate sort operations,
@@ -101,7 +132,7 @@ public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowT
             KeyGroupRangeAssignment.assignKeyToParallelOperator(key, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM, channels);
         dataStream = dataStream.partitionCustom(partitioner, rowDataKeyGen::getPartitionPath);
       }
-      if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SORT_BY_PARTITION)) {
+      if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SORT_INPUT)) {
         SortOperatorGen sortOperatorGen = new SortOperatorGen(rowType, partitionFields);
         // sort by partition keys
         dataStream = dataStream
@@ -278,8 +309,8 @@ public static DataStream<Object> hoodieStreamWrite(Configuration conf, int defau
       WriteOperatorFactory<HoodieRecord> operatorFactory = BucketStreamWriteOperator.getFactory(conf);
       int bucketNum = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
       String indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
-      BucketIndexPartitioner<HoodieKey> partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields);
-      return dataStream.partitionCustom(partitioner, HoodieRecord::getKey)
+      BucketIndexPartitioner<String> partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields);
+      return dataStream.partitionCustom(partitioner, HoodieRecord::getRecordKey)
           .transform("bucket_write", TypeInformation.of(Object.class), operatorFactory)
           .uid("uid_bucket_write" + conf.getString(FlinkOptions.TABLE_NAME))
           .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS));
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
index c5d73036eda60..592520bf902f8 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi.streamer;
 
-import org.apache.flink.runtime.state.StateBackend;
-import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
 import org.apache.hudi.client.utils.OperationConverter;
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
 import org.apache.hudi.common.model.WriteOperationType;
@@ -32,6 +30,8 @@
 
 import com.beust.jcommander.Parameter;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.state.StateBackend;
+import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
 
 import java.util.ArrayList;
 import java.util.HashMap;
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
index bbbc67985c8af..ed99e7b4c1c3d 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
@@ -73,7 +73,7 @@ public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
       // bulk_insert mode
       final String writeOperation = this.conf.get(FlinkOptions.OPERATION);
       if (WriteOperationType.fromValue(writeOperation) == WriteOperationType.BULK_INSERT) {
-        return context.isBounded() ? Pipelines.bulkInsert(conf, rowType, dataStream) : Pipelines.append(conf, rowType, dataStream);
+        return Pipelines.bulkInsert(conf, rowType, dataStream);
       }
 
       // Append mode
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
index fce9b75f764ea..f01993edc61c4 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.table.format;
 
-import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
@@ -43,6 +42,7 @@
 import org.apache.flink.types.RowKind;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -52,6 +52,10 @@
 import java.util.Map;
 import java.util.function.Function;
 
+import static org.apache.hudi.common.fs.FSUtils.getFs;
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
+import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
+
 /**
  * Utilities for format.
  */
@@ -124,11 +128,13 @@ public static HoodieMergedLogRecordScanner logScanner(
       Schema logSchema,
       Configuration config,
       boolean withOperationField) {
-    FileSystem fs = FSUtils.getFs(split.getTablePath(), config);
-    return HoodieMergedLogRecordScanner.newBuilder()
+    String basePath = split.getTablePath();
+    List<String> logPaths = split.getLogPaths().get();
+    FileSystem fs = getFs(basePath, config);
+    HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
-        .withBasePath(split.getTablePath())
-        .withLogFilePaths(split.getLogPaths().get())
+        .withBasePath(basePath)
+        .withLogFilePaths(logPaths)
         .withReaderSchema(logSchema)
         .withLatestInstantTime(split.getLatestCommit())
         .withReadBlocksLazily(
@@ -144,8 +150,12 @@ public static HoodieMergedLogRecordScanner logScanner(
             config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
                 HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
         .withInstantRange(split.getInstantRange())
-        .withOperationField(withOperationField)
-        .build();
+        .withOperationField(withOperationField);
+    if (!isNullOrEmpty(logPaths)) {
+      logRecordScannerBuilder
+          .withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
+    }
+    return logRecordScannerBuilder.build();
   }
 
   private static HoodieUnMergedLogRecordScanner unMergedLogScanner(
@@ -153,7 +163,7 @@ private static HoodieUnMergedLogRecordScanner unMergedLogScanner(
       Schema logSchema,
       Configuration config,
       HoodieUnMergedLogRecordScanner.LogRecordScannerCallback callback) {
-    FileSystem fs = FSUtils.getFs(split.getTablePath(), config);
+    FileSystem fs = getFs(split.getTablePath(), config);
     return HoodieUnMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
         .withBasePath(split.getTablePath())
@@ -234,8 +244,8 @@ public static HoodieMergedLogRecordScanner logScanner(
       HoodieWriteConfig writeConfig,
       Configuration hadoopConf) {
     String basePath = writeConfig.getBasePath();
-    return HoodieMergedLogRecordScanner.newBuilder()
-        .withFileSystem(FSUtils.getFs(basePath, hadoopConf))
+    HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
+        .withFileSystem(getFs(basePath, hadoopConf))
         .withBasePath(basePath)
         .withLogFilePaths(logPaths)
         .withReaderSchema(logSchema)
@@ -246,8 +256,12 @@ public static HoodieMergedLogRecordScanner logScanner(
         .withMaxMemorySizeInBytes(writeConfig.getMaxMemoryPerPartitionMerge())
         .withSpillableMapBasePath(writeConfig.getSpillableMapBasePath())
         .withDiskMapType(writeConfig.getCommonConfig().getSpillableDiskMapType())
-        .withBitCaskDiskMapCompressionEnabled(writeConfig.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
-        .build();
+        .withBitCaskDiskMapCompressionEnabled(writeConfig.getCommonConfig().isBitCaskDiskMapCompressionEnabled());
+    if (!isNullOrEmpty(logPaths)) {
+      logRecordScannerBuilder
+          .withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
+    }
+    return logRecordScannerBuilder.build();
   }
 
   private static Boolean string2Boolean(String s) {
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java
index 6325c2bcceb4e..6e87ff1fb340c 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java
@@ -245,10 +245,13 @@ public static Schema convertToSchema(LogicalType logicalType, String rowName) {
         return nullable ? nullableSchema(time) : time;
       case DECIMAL:
         DecimalType decimalType = (DecimalType) logicalType;
-        // store BigDecimal as byte[]
+        // store BigDecimal as Fixed
+        // for spark compatibility.
         Schema decimal =
             LogicalTypes.decimal(decimalType.getPrecision(), decimalType.getScale())
-                .addToSchema(SchemaBuilder.builder().bytesType());
+                .addToSchema(SchemaBuilder
+                    .fixed(String.format("%s.fixed", rowName))
+                    .size(computeMinBytesForDecimlPrecision(decimalType.getPrecision())));
         return nullable ? nullableSchema(decimal) : decimal;
       case ROW:
         RowType rowType = (RowType) logicalType;
@@ -324,5 +327,13 @@ private static Schema nullableSchema(Schema schema) {
         ? schema
         : Schema.createUnion(SchemaBuilder.builder().nullType(), schema);
   }
+
+  private static int computeMinBytesForDecimlPrecision(int precision) {
+    int numBytes = 1;
+    while (Math.pow(2.0, 8 * numBytes - 1) < Math.pow(10.0, precision)) {
+      numBytes += 1;
+    }
+    return numBytes;
+  }
 }
 
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java
index b46ab14e46384..a6b15ffb74e32 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/FlinkStateBackendConverter.java
@@ -18,12 +18,13 @@
 
 package org.apache.hudi.util;
 
+import org.apache.hudi.exception.HoodieException;
+
 import com.beust.jcommander.IStringConverter;
 import com.beust.jcommander.ParameterException;
 import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
 import org.apache.flink.runtime.state.StateBackend;
 import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
-import org.apache.hudi.exception.HoodieException;
 
 /**
  * Converter that converts a string into Flink StateBackend.
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java
index d90670ff452f8..446a6d04178ce 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataToAvroConverters.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.util;
 
+import org.apache.avro.Conversions;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
@@ -34,6 +35,7 @@
 import org.apache.flink.table.types.logical.TimestampType;
 
 import java.io.Serializable;
+import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.time.Instant;
 import java.time.temporal.ChronoUnit;
@@ -50,6 +52,8 @@
 @Internal
 public class RowDataToAvroConverters {
 
+  private static Conversions.DecimalConversion decimalConversion = new Conversions.DecimalConversion();
+
   // --------------------------------------------------------------------------------
   // Runtime Converters
   // --------------------------------------------------------------------------------
@@ -186,7 +190,8 @@ public Object convert(Schema schema, Object object) {
 
               @Override
               public Object convert(Schema schema, Object object) {
-                return ByteBuffer.wrap(((DecimalData) object).toUnscaledBytes());
+                BigDecimal javaDecimal = ((DecimalData) object).toBigDecimal();
+                return decimalConversion.toFixed(javaDecimal, schema, schema.getLogicalType());
               }
             };
         break;
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java
index 814a8f19e1596..7a8aeff97b560 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java
@@ -207,8 +207,8 @@ void testSyncMetadataTable() throws Exception {
     assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP));
 
     // test metadata table compaction
-    // write another 3 commits
-    for (int i = 1; i < 4; i++) {
+    // write another 4 commits
+    for (int i = 1; i < 5; i++) {
       instant = mockWriteWithMetadata();
       metadataTableMetaClient.reloadActiveTimeline();
       completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
@@ -216,14 +216,14 @@ void testSyncMetadataTable() throws Exception {
       assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant));
     }
     // the 5th commit triggers the compaction
-    instant = mockWriteWithMetadata();
+    mockWriteWithMetadata();
     metadataTableMetaClient.reloadActiveTimeline();
     completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
-    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(6L));
-    assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "001"));
-    assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
+    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(7L));
+    assertThat(completedTimeline.nthFromLastInstant(1).get().getTimestamp(), is(instant + "001"));
+    assertThat(completedTimeline.nthFromLastInstant(1).get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
     // write another 2 commits
-    for (int i = 6; i < 8; i++) {
+    for (int i = 7; i < 8; i++) {
       instant = mockWriteWithMetadata();
       metadataTableMetaClient.reloadActiveTimeline();
       completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
@@ -241,13 +241,15 @@ void testSyncMetadataTable() throws Exception {
 
     // write another commit
     mockWriteWithMetadata();
-    // write another commit to trigger compaction
+    // write another commit
     instant = mockWriteWithMetadata();
+    // write another commit to trigger compaction
+    mockWriteWithMetadata();
     metadataTableMetaClient.reloadActiveTimeline();
     completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
-    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(13L));
-    assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "001"));
-    assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
+    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(14L));
+    assertThat(completedTimeline.nthFromLastInstant(1).get().getTimestamp(), is(instant + "001"));
+    assertThat(completedTimeline.nthFromLastInstant(1).get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
   }
 
   // -------------------------------------------------------------------------
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
index 72c0890bbf649..786a45cac7ac9 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
@@ -906,8 +906,8 @@ void testWriteAndReadDebeziumJson(ExecMode execMode) throws Exception {
   }
 
   @ParameterizedTest
-  @ValueSource(booleans = {true, false})
-  void testBulkInsert(boolean hiveStylePartitioning) {
+  @MethodSource("indexAndPartitioningParams")
+  void testBulkInsert(String indexType, boolean hiveStylePartitioning) {
     TableEnvironment tableEnv = batchTableEnv;
     // csv source
     String csvSourceDDL = TestConfigurations.getCsvSourceDDL("csv_source", "test_source_5.data");
@@ -916,7 +916,8 @@ void testBulkInsert(boolean hiveStylePartitioning) {
     String hoodieTableDDL = sql("hoodie_sink")
         .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
         .option(FlinkOptions.OPERATION, "bulk_insert")
-        .option(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION, true)
+        .option(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_INPUT, true)
+        .option(FlinkOptions.INDEX_TYPE, indexType)
         .option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
         .end();
     tableEnv.executeSql(hoodieTableDDL);
@@ -1178,7 +1179,7 @@ void testParquetComplexNestedRowTypes(String operation) {
   @ParameterizedTest
   @ValueSource(strings = {"insert", "upsert", "bulk_insert"})
   void testBuiltinFunctionWithCatalog(String operation) {
-    TableEnvironment tableEnv = streamTableEnv;
+    TableEnvironment tableEnv = batchTableEnv;
 
     String hudiCatalogDDL = catalog("hudi_" + operation)
         .catalogPath(tempFile.getAbsolutePath())
@@ -1262,6 +1263,19 @@ private static Stream<Arguments> tableTypeAndPartitioningParams() {
     return Stream.of(data).map(Arguments::of);
   }
 
+  /**
+   * Return test params => (index type, hive style partitioning).
+   */
+  private static Stream<Arguments> indexAndPartitioningParams() {
+    Object[][] data =
+        new Object[][] {
+            {"FLINK_STATE", false},
+            {"FLINK_STATE", true},
+            {"BUCKET", false},
+            {"BUCKET", true}};
+    return Stream.of(data).map(Arguments::of);
+  }
+
   private void execInsertSql(TableEnvironment tEnv, String insert) {
     TableResult tableResult = tEnv.executeSql(insert);
     // wait to finish
diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
index c1e924056cfa2..f2439b4471d3c 100644
--- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
+++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
@@ -67,6 +67,8 @@
 import java.util.stream.IntStream;
 
 import static junit.framework.TestCase.assertEquals;
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
+import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -661,7 +663,7 @@ private static HoodieMergedLogRecordScanner getScanner(
       List<String> logPaths,
       Schema readSchema,
       String instant) {
-    return HoodieMergedLogRecordScanner.newBuilder()
+    HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
         .withBasePath(basePath)
         .withLogFilePaths(logPaths)
@@ -673,8 +675,12 @@ private static HoodieMergedLogRecordScanner getScanner(
         .withMaxMemorySizeInBytes(1024 * 1024L)
         .withSpillableMapBasePath("/tmp/")
         .withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
-        .withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
-        .build();
+        .withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue());
+    if (!isNullOrEmpty(logPaths)) {
+      logRecordScannerBuilder
+          .withPartition(getRelativePartitionPath(new Path(basePath), new Path(logPaths.get(0)).getParent()));
+    }
+    return logRecordScannerBuilder.build();
   }
 
   /**
diff --git a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml
index f6e4f5bc88dfe..68a4d89829d78 100644
--- a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml
+++ b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml
@@ -20,12 +20,12 @@
     <parent>
         <artifactId>hudi-flink-datasource</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-flink1.13.x</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <properties>
diff --git a/hudi-flink-datasource/hudi-flink1.14.x/pom.xml b/hudi-flink-datasource/hudi-flink1.14.x/pom.xml
index 0e5df91b49c90..186d8bd3c2da8 100644
--- a/hudi-flink-datasource/hudi-flink1.14.x/pom.xml
+++ b/hudi-flink-datasource/hudi-flink1.14.x/pom.xml
@@ -20,12 +20,12 @@
     <parent>
         <artifactId>hudi-flink-datasource</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-flink1.14.x</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <properties>
diff --git a/hudi-flink-datasource/pom.xml b/hudi-flink-datasource/pom.xml
index 1ce7735b9844a..c33e4280ee8db 100644
--- a/hudi-flink-datasource/pom.xml
+++ b/hudi-flink-datasource/pom.xml
@@ -20,12 +20,12 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-flink-datasource</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <packaging>pom</packaging>
 
     <properties>
diff --git a/hudi-gcp/pom.xml b/hudi-gcp/pom.xml
new file mode 100644
index 0000000000000..da4046b1611e3
--- /dev/null
+++ b/hudi-gcp/pom.xml
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hudi</artifactId>
+    <groupId>org.apache.hudi</groupId>
+    <version>0.12.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>hudi-gcp</artifactId>
+  <packaging>jar</packaging>
+
+  <!--  Using libraries-bom to manage versions.
+See https://github.com/GoogleCloudPlatform/cloud-opensource-java/wiki/The-Google-Cloud-Platform-Libraries-BOM -->
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>com.google.cloud</groupId>
+        <artifactId>libraries-bom</artifactId>
+        <version>25.1.0</version>
+        <type>pom</type>
+        <scope>import</scope>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+
+  <dependencies>
+    <!-- Hoodie -->
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-sync-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-bigquery</artifactId>
+    </dependency>
+
+    <!-- Logging -->
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+    </dependency>
+
+    <!-- Hadoop -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-api</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>${maven-jar-plugin.version}</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/hudi-gcp/src/assembly/src.xml b/hudi-gcp/src/assembly/src.xml
new file mode 100644
index 0000000000000..646e94c1a00b6
--- /dev/null
+++ b/hudi-gcp/src/assembly/src.xml
@@ -0,0 +1,46 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+  -->
+
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
+  <id>jar-with-dependencies</id>
+  <formats>
+    <format>jar</format>
+  </formats>
+
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <dependencySets>
+
+    <dependencySet>
+      <outputDirectory>/</outputDirectory>
+      <unpack>true</unpack>
+      <scope>runtime</scope>
+      <excludes>
+        <exclude>junit:junit</exclude>
+        <exclude>com.google.code.findbugs:*</exclude>
+        <exclude>org.apache.hbase:*</exclude>
+      </excludes>
+    </dependencySet>
+
+    <dependencySet>
+      <unpack>true</unpack>
+      <scope>provided</scope>
+    </dependencySet>
+  </dependencySets>
+</assembly>
diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
new file mode 100644
index 0000000000000..6aa9bc0b535a0
--- /dev/null
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import org.apache.hudi.common.config.TypedProperties;
+
+import com.beust.jcommander.Parameter;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Configs needed to sync data into BigQuery.
+ */
+public class BigQuerySyncConfig implements Serializable {
+
+  public static String BIGQUERY_SYNC_PROJECT_ID = "hoodie.gcp.bigquery.sync.project_id";
+  public static String BIGQUERY_SYNC_DATASET_NAME = "hoodie.gcp.bigquery.sync.dataset_name";
+  public static String BIGQUERY_SYNC_DATASET_LOCATION = "hoodie.gcp.bigquery.sync.dataset_location";
+  public static String BIGQUERY_SYNC_TABLE_NAME = "hoodie.gcp.bigquery.sync.table_name";
+  public static String BIGQUERY_SYNC_SOURCE_URI = "hoodie.gcp.bigquery.sync.source_uri";
+  public static String BIGQUERY_SYNC_SOURCE_URI_PREFIX = "hoodie.gcp.bigquery.sync.source_uri_prefix";
+  public static String BIGQUERY_SYNC_SYNC_BASE_PATH = "hoodie.gcp.bigquery.sync.base_path";
+  public static String BIGQUERY_SYNC_PARTITION_FIELDS = "hoodie.gcp.bigquery.sync.partition_fields";
+  public static String BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA = "hoodie.gcp.bigquery.sync.use_file_listing_from_metadata";
+  public static String BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING = "hoodie.gcp.bigquery.sync.assume_date_partitioning";
+
+  @Parameter(names = {"--project-id"}, description = "name of the target project in BigQuery", required = true)
+  public String projectId;
+  @Parameter(names = {"--dataset-name"}, description = "name of the target dataset in BigQuery", required = true)
+  public String datasetName;
+  @Parameter(names = {"--dataset-location"}, description = "location of the target dataset in BigQuery", required = true)
+  public String datasetLocation;
+  @Parameter(names = {"--table-name"}, description = "name of the target table in BigQuery", required = true)
+  public String tableName;
+  @Parameter(names = {"--source-uri"}, description = "name of the source uri gcs path of the table", required = true)
+  public String sourceUri;
+  @Parameter(names = {"--source-uri-prefix"}, description = "name of the source uri gcs path prefix of the table", required = true)
+  public String sourceUriPrefix;
+  @Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true)
+  public String basePath;
+  @Parameter(names = {"--partitioned-by"}, description = "Comma-delimited partition fields. Default to non-partitioned.")
+  public List<String> partitionFields = new ArrayList<>();
+  @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata")
+  public Boolean useFileListingFromMetadata = false;
+  @Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this"
+      + " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter")
+  public Boolean assumeDatePartitioning = false;
+  @Parameter(names = {"--help", "-h"}, help = true)
+  public Boolean help = false;
+
+  public static BigQuerySyncConfig copy(BigQuerySyncConfig cfg) {
+    BigQuerySyncConfig newConfig = new BigQuerySyncConfig();
+    newConfig.projectId = cfg.projectId;
+    newConfig.datasetName = cfg.datasetName;
+    newConfig.datasetLocation = cfg.datasetLocation;
+    newConfig.tableName = cfg.tableName;
+    newConfig.sourceUri = cfg.sourceUri;
+    newConfig.sourceUriPrefix = cfg.sourceUriPrefix;
+    newConfig.basePath = cfg.basePath;
+    newConfig.partitionFields = cfg.partitionFields;
+    newConfig.useFileListingFromMetadata = cfg.useFileListingFromMetadata;
+    newConfig.assumeDatePartitioning = cfg.assumeDatePartitioning;
+    newConfig.help = cfg.help;
+    return newConfig;
+  }
+
+  public TypedProperties toProps() {
+    TypedProperties properties = new TypedProperties();
+    properties.put(BIGQUERY_SYNC_PROJECT_ID, projectId);
+    properties.put(BIGQUERY_SYNC_DATASET_NAME, datasetName);
+    properties.put(BIGQUERY_SYNC_DATASET_LOCATION, datasetLocation);
+    properties.put(BIGQUERY_SYNC_TABLE_NAME, tableName);
+    properties.put(BIGQUERY_SYNC_SOURCE_URI, sourceUri);
+    properties.put(BIGQUERY_SYNC_SOURCE_URI_PREFIX, sourceUriPrefix);
+    properties.put(BIGQUERY_SYNC_SYNC_BASE_PATH, basePath);
+    properties.put(BIGQUERY_SYNC_PARTITION_FIELDS, String.join(",", partitionFields));
+    properties.put(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA, useFileListingFromMetadata);
+    properties.put(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING, assumeDatePartitioning);
+    return properties;
+  }
+
+  public static BigQuerySyncConfig fromProps(TypedProperties props) {
+    BigQuerySyncConfig config = new BigQuerySyncConfig();
+    config.projectId = props.getString(BIGQUERY_SYNC_PROJECT_ID);
+    config.datasetName = props.getString(BIGQUERY_SYNC_DATASET_NAME);
+    config.datasetLocation = props.getString(BIGQUERY_SYNC_DATASET_LOCATION);
+    config.tableName = props.getString(BIGQUERY_SYNC_TABLE_NAME);
+    config.sourceUri = props.getString(BIGQUERY_SYNC_SOURCE_URI);
+    config.sourceUriPrefix = props.getString(BIGQUERY_SYNC_SOURCE_URI_PREFIX);
+    config.basePath = props.getString(BIGQUERY_SYNC_SYNC_BASE_PATH);
+    config.partitionFields = props.getStringList(BIGQUERY_SYNC_PARTITION_FIELDS, ",", Collections.emptyList());
+    config.useFileListingFromMetadata = props.getBoolean(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA, false);
+    config.assumeDatePartitioning = props.getBoolean(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING, false);
+    return config;
+  }
+
+  @Override
+  public String toString() {
+    return "BigQuerySyncConfig{projectId='" + projectId
+        + "', datasetName='" + datasetName
+        + "', datasetLocation='" + datasetLocation
+        + "', tableName='" + tableName
+        + "', sourceUri='" + sourceUri
+        + "', sourceUriPrefix='" + sourceUriPrefix
+        + "', basePath='" + basePath + "'"
+        + ", partitionFields=" + partitionFields
+        + "', useFileListingFromMetadata='" + useFileListingFromMetadata
+        + "', assumeDataPartitioning='" + assumeDatePartitioning
+        + "', help=" + help + "}";
+  }
+}
diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
new file mode 100644
index 0000000000000..0cb75eea89bad
--- /dev/null
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.sync.common.AbstractSyncTool;
+import org.apache.hudi.sync.common.util.ManifestFileWriter;
+
+import com.beust.jcommander.JCommander;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Tool to sync a hoodie table with a big query table. Either use it as an api
+ * BigQuerySyncTool.syncHoodieTable(BigQuerySyncConfig) or as a command line java -cp hoodie-hive.jar BigQuerySyncTool [args]
+ * <p>
+ * This utility will get the schema from the latest commit and will sync big query table schema.
+ *
+ * @Experimental
+ */
+public class BigQuerySyncTool extends AbstractSyncTool {
+
+  private static final Logger LOG = LogManager.getLogger(BigQuerySyncTool.class);
+
+  public final BigQuerySyncConfig cfg;
+  public final String manifestTableName;
+  public final String versionsTableName;
+  public final String snapshotViewName;
+
+  public BigQuerySyncTool(TypedProperties properties, Configuration conf, FileSystem fs) {
+    super(properties, conf, fs);
+    cfg = BigQuerySyncConfig.fromProps(properties);
+    manifestTableName = cfg.tableName + "_manifest";
+    versionsTableName = cfg.tableName + "_versions";
+    snapshotViewName = cfg.tableName;
+  }
+
+  @Override
+  public void syncHoodieTable() {
+    try (HoodieBigQuerySyncClient bqSyncClient = new HoodieBigQuerySyncClient(BigQuerySyncConfig.fromProps(props), fs)) {
+      switch (bqSyncClient.getTableType()) {
+        case COPY_ON_WRITE:
+          syncCoWTable(bqSyncClient);
+          break;
+        case MERGE_ON_READ:
+        default:
+          throw new UnsupportedOperationException(bqSyncClient.getTableType() + " table type is not supported yet.");
+      }
+    } catch (Exception e) {
+      throw new HoodieBigQuerySyncException("Got runtime exception when big query syncing " + cfg.tableName, e);
+    }
+  }
+
+  private void syncCoWTable(HoodieBigQuerySyncClient bqSyncClient) {
+    ValidationUtils.checkState(bqSyncClient.getTableType() == HoodieTableType.COPY_ON_WRITE);
+    LOG.info("Sync hoodie table " + snapshotViewName + " at base path " + bqSyncClient.getBasePath());
+
+    if (!bqSyncClient.datasetExists()) {
+      throw new HoodieBigQuerySyncException("Dataset not found: " + cfg);
+    }
+
+    ManifestFileWriter manifestFileWriter = ManifestFileWriter.builder()
+        .setConf(conf)
+        .setBasePath(cfg.basePath)
+        .setUseFileListingFromMetadata(cfg.useFileListingFromMetadata)
+        .setAssumeDatePartitioning(cfg.assumeDatePartitioning)
+        .build();
+    manifestFileWriter.writeManifestFile();
+
+    if (!bqSyncClient.tableExists(manifestTableName)) {
+      bqSyncClient.createManifestTable(manifestTableName, manifestFileWriter.getManifestSourceUri());
+      LOG.info("Manifest table creation complete for " + manifestTableName);
+    }
+    if (!bqSyncClient.tableExists(versionsTableName)) {
+      bqSyncClient.createVersionsTable(versionsTableName, cfg.sourceUri, cfg.sourceUriPrefix, cfg.partitionFields);
+      LOG.info("Versions table creation complete for " + versionsTableName);
+    }
+    if (!bqSyncClient.tableExists(snapshotViewName)) {
+      bqSyncClient.createSnapshotView(snapshotViewName, versionsTableName, manifestTableName);
+      LOG.info("Snapshot view creation complete for " + snapshotViewName);
+    }
+
+    // TODO: Implement automatic schema evolution when you add a new column.
+    LOG.info("Sync table complete for " + snapshotViewName);
+  }
+
+  public static void main(String[] args) {
+    BigQuerySyncConfig cfg = new BigQuerySyncConfig();
+    JCommander cmd = new JCommander(cfg, null, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+    FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration());
+    new BigQuerySyncTool(cfg.toProps(), fs.getConf(), fs).syncHoodieTable();
+  }
+}
diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
new file mode 100644
index 0000000000000..cb41ca22724c3
--- /dev/null
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
+
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.BigQueryException;
+import com.google.cloud.bigquery.BigQueryOptions;
+import com.google.cloud.bigquery.CsvOptions;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.ExternalTableDefinition;
+import com.google.cloud.bigquery.Field;
+import com.google.cloud.bigquery.FormatOptions;
+import com.google.cloud.bigquery.HivePartitioningOptions;
+import com.google.cloud.bigquery.Schema;
+import com.google.cloud.bigquery.StandardSQLTypeName;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableId;
+import com.google.cloud.bigquery.TableInfo;
+import com.google.cloud.bigquery.ViewDefinition;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.parquet.schema.MessageType;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+public class HoodieBigQuerySyncClient extends AbstractSyncHoodieClient {
+  private static final Logger LOG = LogManager.getLogger(HoodieBigQuerySyncClient.class);
+
+  private final BigQuerySyncConfig syncConfig;
+  private transient BigQuery bigquery;
+
+  public HoodieBigQuerySyncClient(final BigQuerySyncConfig syncConfig, final FileSystem fs) {
+    super(syncConfig.basePath, syncConfig.assumeDatePartitioning, syncConfig.useFileListingFromMetadata,
+        false, fs);
+    this.syncConfig = syncConfig;
+    this.createBigQueryConnection();
+  }
+
+  private void createBigQueryConnection() {
+    if (bigquery == null) {
+      try {
+        // Initialize client that will be used to send requests. This client only needs to be created
+        // once, and can be reused for multiple requests.
+        bigquery = BigQueryOptions.newBuilder().setLocation(syncConfig.datasetLocation).build().getService();
+        LOG.info("Successfully established BigQuery connection.");
+      } catch (BigQueryException e) {
+        throw new HoodieBigQuerySyncException("Cannot create bigQuery connection ", e);
+      }
+    }
+  }
+
+  @Override
+  public void createTable(final String tableName, final MessageType storageSchema, final String inputFormatClass,
+                          final String outputFormatClass, final String serdeClass,
+                          final Map<String, String> serdeProperties, final Map<String, String> tableProperties) {
+    // bigQuery create table arguments are different, so do nothing.
+  }
+
+  public void createManifestTable(String tableName, String sourceUri) {
+    try {
+      TableId tableId = TableId.of(syncConfig.projectId, syncConfig.datasetName, tableName);
+      CsvOptions csvOptions = CsvOptions.newBuilder()
+          .setFieldDelimiter(",")
+          .setAllowJaggedRows(false)
+          .setAllowQuotedNewLines(false)
+          .setSkipLeadingRows(0)
+          .build();
+      Schema schema = Schema.of(
+          Field.of("filename", StandardSQLTypeName.STRING));
+
+      ExternalTableDefinition customTable =
+          ExternalTableDefinition.newBuilder(sourceUri, schema, csvOptions)
+              .setAutodetect(false)
+              .setIgnoreUnknownValues(false)
+              .setMaxBadRecords(0)
+              .build();
+      bigquery.create(TableInfo.of(tableId, customTable));
+      LOG.info("Manifest External table created.");
+    } catch (BigQueryException e) {
+      throw new HoodieBigQuerySyncException("Manifest External table was not created ", e);
+    }
+  }
+
+  public void createVersionsTable(String tableName, String sourceUri, String sourceUriPrefix, List<String> partitionFields) {
+    try {
+      ExternalTableDefinition customTable;
+      TableId tableId = TableId.of(syncConfig.projectId, syncConfig.datasetName, tableName);
+
+      if (partitionFields.isEmpty()) {
+        customTable =
+            ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet())
+                .setAutodetect(true)
+                .setIgnoreUnknownValues(true)
+                .setMaxBadRecords(0)
+                .build();
+      } else {
+        // Configuring partitioning options for partitioned table.
+        HivePartitioningOptions hivePartitioningOptions =
+            HivePartitioningOptions.newBuilder()
+                .setMode("AUTO")
+                .setRequirePartitionFilter(false)
+                .setSourceUriPrefix(sourceUriPrefix)
+                .build();
+        customTable =
+            ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet())
+                .setAutodetect(true)
+                .setHivePartitioningOptions(hivePartitioningOptions)
+                .setIgnoreUnknownValues(true)
+                .setMaxBadRecords(0)
+                .build();
+      }
+
+      bigquery.create(TableInfo.of(tableId, customTable));
+      LOG.info("External table created using hivepartitioningoptions");
+    } catch (BigQueryException e) {
+      throw new HoodieBigQuerySyncException("External table was not created ", e);
+    }
+  }
+
+  public void createSnapshotView(String viewName, String versionsTableName, String manifestTableName) {
+    try {
+      TableId tableId = TableId.of(syncConfig.projectId, syncConfig.datasetName, viewName);
+      String query =
+          String.format(
+              "SELECT * FROM `%s.%s.%s` WHERE _hoodie_file_name IN "
+                  + "(SELECT filename FROM `%s.%s.%s`)",
+              syncConfig.projectId,
+              syncConfig.datasetName,
+              versionsTableName,
+              syncConfig.projectId,
+              syncConfig.datasetName,
+              manifestTableName);
+
+      ViewDefinition viewDefinition =
+          ViewDefinition.newBuilder(query).setUseLegacySql(false).build();
+
+      bigquery.create(TableInfo.of(tableId, viewDefinition));
+      LOG.info("View created successfully");
+    } catch (BigQueryException e) {
+      throw new HoodieBigQuerySyncException("View was not created ", e);
+    }
+  }
+
+  @Override
+  public Map<String, String> getTableSchema(String tableName) {
+    // TODO: Implement automatic schema evolution when you add a new column.
+    return Collections.emptyMap();
+  }
+
+  @Override
+  public void addPartitionsToTable(final String tableName, final List<String> partitionsToAdd) {
+    // bigQuery discovers the new partitions automatically, so do nothing.
+    throw new UnsupportedOperationException("No support for addPartitionsToTable yet.");
+  }
+
+  public boolean datasetExists() {
+    Dataset dataset = bigquery.getDataset(DatasetId.of(syncConfig.projectId, syncConfig.datasetName));
+    return dataset != null;
+  }
+
+  @Override
+  public boolean doesTableExist(final String tableName) {
+    return tableExists(tableName);
+  }
+
+  @Override
+  public boolean tableExists(String tableName) {
+    TableId tableId = TableId.of(syncConfig.projectId, syncConfig.datasetName, tableName);
+    Table table = bigquery.getTable(tableId, BigQuery.TableOption.fields());
+    return table != null && table.exists();
+  }
+
+  @Override
+  public Option<String> getLastCommitTimeSynced(final String tableName) {
+    // bigQuery doesn't support tblproperties, so do nothing.
+    throw new UnsupportedOperationException("Not support getLastCommitTimeSynced yet.");
+  }
+
+  @Override
+  public void updateLastCommitTimeSynced(final String tableName) {
+    // bigQuery doesn't support tblproperties, so do nothing.
+    throw new UnsupportedOperationException("No support for updateLastCommitTimeSynced yet.");
+  }
+
+  @Override
+  public Option<String> getLastReplicatedTime(String tableName) {
+    // bigQuery doesn't support tblproperties, so do nothing.
+    throw new UnsupportedOperationException("Not support getLastReplicatedTime yet.");
+  }
+
+  @Override
+  public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
+    // bigQuery doesn't support tblproperties, so do nothing.
+    throw new UnsupportedOperationException("No support for updateLastReplicatedTimeStamp yet.");
+  }
+
+  @Override
+  public void deleteLastReplicatedTimeStamp(String tableName) {
+    // bigQuery doesn't support tblproperties, so do nothing.
+    throw new UnsupportedOperationException("No support for deleteLastReplicatedTimeStamp yet.");
+  }
+
+  @Override
+  public void updatePartitionsToTable(final String tableName, final List<String> changedPartitions) {
+    // bigQuery updates the partitions automatically, so do nothing.
+    throw new UnsupportedOperationException("No support for updatePartitionsToTable yet.");
+  }
+
+  @Override
+  public void dropPartitions(String tableName, List<String> partitionsToDrop) {
+    // bigQuery discovers the new partitions automatically, so do nothing.
+    throw new UnsupportedOperationException("No support for dropPartitions yet.");
+  }
+
+  @Override
+  public void close() {
+    // bigQuery has no connection close method, so do nothing.
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/columnstats/TestColumnStatsIndexHelper.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncException.java
similarity index 56%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/columnstats/TestColumnStatsIndexHelper.java
rename to hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncException.java
index 3901a93783902..4d30b2faa1d9b 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/columnstats/TestColumnStatsIndexHelper.java
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncException.java
@@ -17,25 +17,27 @@
  * under the License.
  */
 
-package org.apache.hudi.index.columnstats;
+package org.apache.hudi.gcp.bigquery;
 
-import org.junit.jupiter.api.Test;
+public class HoodieBigQuerySyncException extends RuntimeException {
 
-import java.util.Arrays;
+  public HoodieBigQuerySyncException() {
+    super();
+  }
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
+  public HoodieBigQuerySyncException(String message) {
+    super(message);
+  }
 
-public class TestColumnStatsIndexHelper {
+  public HoodieBigQuerySyncException(String message, Throwable t) {
+    super(message, t);
+  }
 
-  @Test
-  public void testMergeSql() {
-    String q = ColumnStatsIndexHelper.createIndexMergeSql("old", "new", Arrays.asList("file", "a", "b"));
-    assertEquals(
-        "SELECT "
-            + "if (new.file is null, old.file, new.file) AS file, "
-            + "if (new.a is null, old.a, new.a) AS a, "
-            + "if (new.b is null, old.b, new.b) AS b "
-            + "FROM old FULL JOIN new ON old.file = new.file", q);
+  public HoodieBigQuerySyncException(Throwable t) {
+    super(t);
   }
 
+  protected static String format(String message, Object... args) {
+    return String.format(String.valueOf(message), args);
+  }
 }
diff --git a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncConfig.java b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncConfig.java
new file mode 100644
index 0000000000000..8b3250ccde0f6
--- /dev/null
+++ b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncConfig.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import org.apache.hudi.common.config.TypedProperties;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_LOCATION;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_NAME;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PARTITION_FIELDS;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PROJECT_ID;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI_PREFIX;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SYNC_BASE_PATH;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_TABLE_NAME;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class TestBigQuerySyncConfig {
+
+  BigQuerySyncConfig syncConfig;
+
+  @BeforeEach
+  void setUp() {
+    syncConfig = new BigQuerySyncConfig();
+    syncConfig.projectId = "fooproject";
+    syncConfig.datasetName = "foodataset";
+    syncConfig.datasetLocation = "US";
+    syncConfig.tableName = "footable";
+    syncConfig.sourceUri = "gs://test-bucket/dwh/table_name/dt=*";
+    syncConfig.sourceUriPrefix = "gs://test-bucket/dwh/table_name/";
+    syncConfig.basePath = "gs://test-bucket/dwh/table_name";
+    syncConfig.partitionFields = Arrays.asList("a", "b");
+    syncConfig.useFileListingFromMetadata = true;
+    syncConfig.assumeDatePartitioning = true;
+    syncConfig.help = true;
+  }
+
+  @Test
+  public void testCopy() {
+    BigQuerySyncConfig copied = BigQuerySyncConfig.copy(syncConfig);
+    assertEquals(copied.partitionFields, syncConfig.partitionFields);
+    assertEquals(copied.basePath, syncConfig.basePath);
+    assertEquals(copied.projectId, syncConfig.projectId);
+    assertEquals(copied.datasetName, syncConfig.datasetName);
+    assertEquals(copied.datasetLocation, syncConfig.datasetLocation);
+    assertEquals(copied.tableName, syncConfig.tableName);
+    assertEquals(copied.sourceUri, syncConfig.sourceUri);
+    assertEquals(copied.sourceUriPrefix, syncConfig.sourceUriPrefix);
+    assertEquals(copied.useFileListingFromMetadata, syncConfig.useFileListingFromMetadata);
+    assertEquals(copied.assumeDatePartitioning, syncConfig.assumeDatePartitioning);
+    assertEquals(copied.help, syncConfig.help);
+  }
+
+  @Test
+  public void testToProps() {
+    TypedProperties props = syncConfig.toProps();
+    assertEquals("fooproject", props.getString(BIGQUERY_SYNC_PROJECT_ID));
+    assertEquals("foodataset", props.getString(BIGQUERY_SYNC_DATASET_NAME));
+    assertEquals("US", props.getString(BIGQUERY_SYNC_DATASET_LOCATION));
+    assertEquals("footable", props.getString(BIGQUERY_SYNC_TABLE_NAME));
+    assertEquals("gs://test-bucket/dwh/table_name/dt=*", props.getString(BIGQUERY_SYNC_SOURCE_URI));
+    assertEquals("gs://test-bucket/dwh/table_name/", props.getString(BIGQUERY_SYNC_SOURCE_URI_PREFIX));
+    assertEquals("gs://test-bucket/dwh/table_name", props.getString(BIGQUERY_SYNC_SYNC_BASE_PATH));
+    assertEquals("a,b", props.getString(BIGQUERY_SYNC_PARTITION_FIELDS));
+    assertEquals("true", props.getString(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA));
+    assertEquals("true", props.getString(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING));
+  }
+
+  @Test
+  public void fromProps() {
+    TypedProperties props = new TypedProperties();
+    props.put(BIGQUERY_SYNC_PROJECT_ID, "fooproject");
+    props.put(BIGQUERY_SYNC_DATASET_NAME, "foodataset");
+    props.put(BIGQUERY_SYNC_DATASET_LOCATION, "US");
+    props.put(BIGQUERY_SYNC_TABLE_NAME, "footable");
+    props.put(BIGQUERY_SYNC_SOURCE_URI, "gs://test-bucket/dwh/table_name/dt=*");
+    props.put(BIGQUERY_SYNC_SOURCE_URI_PREFIX, "gs://test-bucket/dwh/table_name/");
+    props.put(BIGQUERY_SYNC_SYNC_BASE_PATH, "gs://test-bucket/dwh/table_name");
+    props.put(BIGQUERY_SYNC_PARTITION_FIELDS, "a,b");
+    props.put(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA, true);
+    props.put(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING, true);
+    BigQuerySyncConfig cfg = BigQuerySyncConfig.fromProps(props);
+
+    assertEquals(syncConfig.projectId, cfg.projectId);
+    assertEquals(syncConfig.datasetName, cfg.datasetName);
+    assertEquals(syncConfig.datasetLocation, cfg.datasetLocation);
+    assertEquals(syncConfig.tableName, cfg.tableName);
+    assertEquals(syncConfig.sourceUri, cfg.sourceUri);
+    assertEquals(syncConfig.sourceUriPrefix, cfg.sourceUriPrefix);
+    assertEquals(syncConfig.basePath, cfg.basePath);
+    assertEquals(syncConfig.partitionFields, cfg.partitionFields);
+    assertEquals(syncConfig.useFileListingFromMetadata, cfg.useFileListingFromMetadata);
+    assertEquals(syncConfig.assumeDatePartitioning, cfg.assumeDatePartitioning);
+  }
+}
diff --git a/hudi-gcp/src/test/resources/log4j-surefire-quiet.properties b/hudi-gcp/src/test/resources/log4j-surefire-quiet.properties
new file mode 100644
index 0000000000000..78d6cfe849883
--- /dev/null
+++ b/hudi-gcp/src/test/resources/log4j-surefire-quiet.properties
@@ -0,0 +1,29 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=ERROR, CONSOLE
+log4j.logger.org.apache.hudi=ERROR
+
+# CONSOLE is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# CONSOLE uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-gcp/src/test/resources/log4j-surefire.properties b/hudi-gcp/src/test/resources/log4j-surefire.properties
new file mode 100644
index 0000000000000..7914f0a78273b
--- /dev/null
+++ b/hudi-gcp/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,29 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache.hudi=INFO
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml
index bf87bfaa36a81..a2a83658c1447 100644
--- a/hudi-hadoop-mr/pom.xml
+++ b/hudi-hadoop-mr/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
index b917f004bcd06..9618f5f7caded 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
@@ -18,12 +18,6 @@
 
 package org.apache.hudi.hadoop.realtime;
 
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.config.HoodieCommonConfig;
 import org.apache.hudi.common.fs.FSUtils;
@@ -35,15 +29,27 @@
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
 import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath;
+import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
+
 class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
     implements RecordReader<NullWritable, ArrayWritable> {
 
@@ -77,10 +83,11 @@ private HoodieMergedLogRecordScanner getMergedLogRecordScanner() throws IOExcept
     // NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
     // but can return records for completed commits > the commit we are trying to read (if using
     // readCommit() API)
-    return HoodieMergedLogRecordScanner.newBuilder()
+    List<String> logPaths = split.getDeltaLogPaths();
+    HoodieMergedLogRecordScanner.Builder logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(FSUtils.getFs(split.getPath().toString(), jobConf))
         .withBasePath(split.getBasePath())
-        .withLogFilePaths(split.getDeltaLogPaths())
+        .withLogFilePaths(logPaths)
         .withReaderSchema(usesCustomPayload ? getWriterSchema() : getReaderSchema())
         .withLatestInstantTime(split.getMaxCommitTime())
         .withMaxMemorySizeInBytes(HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf))
@@ -90,8 +97,12 @@ private HoodieMergedLogRecordScanner getMergedLogRecordScanner() throws IOExcept
         .withSpillableMapBasePath(jobConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
         .withDiskMapType(jobConf.getEnum(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.key(), HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue()))
         .withBitCaskDiskMapCompressionEnabled(jobConf.getBoolean(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(),
-            HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue()))
-        .build();
+            HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue()));
+    if (!isNullOrEmpty(logPaths)) {
+      logRecordScannerBuilder
+          .withPartition(getRelativePartitionPath(new Path(split.getBasePath()), new Path(logPaths.get(0)).getParent()));
+    }
+    return logRecordScannerBuilder.build();
   }
 
   private Option<GenericRecord> buildGenericRecordwithCustomPayload(HoodieRecord record) throws IOException {
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
index fa2bce4875379..cbfd197f43897 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
@@ -71,7 +71,6 @@ public class HoodieHiveUtils {
   public static final String DEFAULT_SCAN_MODE = SNAPSHOT_SCAN_MODE;
   public static final int DEFAULT_MAX_COMMITS = 1;
   public static final int MAX_COMMIT_ALL = -1;
-  public static final int DEFAULT_LEVELS_TO_BASEPATH = 3;
   public static final Pattern HOODIE_CONSUME_MODE_PATTERN_STRING = Pattern.compile("hoodie\\.(.*)\\.consume\\.mode");
   public static final String GLOBALLY_CONSISTENT_READ_TIMESTAMP = "last_replication_timestamp";
 
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
index 7fec1fb63f6fa..7b47ffa75f3d2 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
@@ -46,6 +46,7 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.TableNotFoundException;
 import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile;
 import org.apache.hudi.hadoop.HoodieHFileInputFormat;
 import org.apache.hudi.hadoop.HoodieParquetInputFormat;
@@ -68,6 +69,7 @@
 
 import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS;
 import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE;
+import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
 
 public class HoodieInputFormatUtils {
 
@@ -324,14 +326,24 @@ public static Map<Path, HoodieTableMetaClient> getTableMetaClientByPartitionPath
    * Extract HoodieTableMetaClient from a partition path (not base path)
    */
   public static HoodieTableMetaClient getTableMetaClientForBasePathUnchecked(Configuration conf, Path partitionPath) throws IOException {
+    Path baseDir = partitionPath;
     FileSystem fs = partitionPath.getFileSystem(conf);
-    int levels = HoodieHiveUtils.DEFAULT_LEVELS_TO_BASEPATH;
     if (HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath)) {
       HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
       metadata.readFromFS();
-      levels = metadata.getPartitionDepth();
+      int levels = metadata.getPartitionDepth();
+      baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
+    } else {
+      for (int i = 0; i < partitionPath.depth(); i++) {
+        if (fs.exists(new Path(baseDir, METAFOLDER_NAME))) {
+          break;
+        } else if (i == partitionPath.depth() - 1) {
+          throw new TableNotFoundException(partitionPath.toString());
+        } else {
+          baseDir = baseDir.getParent();
+        }
+      }
     }
-    Path baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
     LOG.info("Reading hoodie metadata from path " + baseDir.toString());
     return HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).build();
   }
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java
index 67d15f9b916e1..92bf6f3ca718c 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java
@@ -162,6 +162,21 @@ public void testInputFormatLoad() throws IOException {
     assertEquals(10, inputSplits.length);
   }
 
+  @Test
+  public void testInputFormatLoadWithEmptyTable() throws IOException {
+    // initial hoodie table
+    String bathPathStr = "/tmp/test_empty_table";
+    HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), bathPathStr, HoodieTableType.COPY_ON_WRITE,
+            baseFileFormat);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, bathPathStr);
+
+    FileStatus[] files = inputFormat.listStatus(jobConf);
+    assertEquals(0, files.length);
+    InputSplit[] inputSplits = inputFormat.getSplits(jobConf, 0);
+    assertEquals(0, inputSplits.length);
+  }
+
   @Test
   public void testInputFormatUpdates() throws IOException {
     // initial commit
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java
index 2ae7c36d98e7e..10ea84f5dca11 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java
@@ -167,6 +167,21 @@ public void testInputFormatLoad() throws IOException {
     assertEquals(10, files.length);
   }
 
+  @Test
+  public void testInputFormatLoadWithEmptyTable() throws IOException {
+    // initial hoodie table
+    String bathPathStr = "/tmp/test_empty_table";
+    HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), bathPathStr, HoodieTableType.COPY_ON_WRITE,
+            baseFileFormat);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, bathPathStr);
+
+    FileStatus[] files = inputFormat.listStatus(jobConf);
+    assertEquals(0, files.length);
+    InputSplit[] inputSplits = inputFormat.getSplits(jobConf, 0);
+    assertEquals(0, inputSplits.length);
+  }
+
   @Test
   public void testInputFormatUpdates() throws IOException {
     // initial commit
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java
index 3966aa339346f..c978cf1419977 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java
@@ -56,6 +56,12 @@ public class TestInputPathHandler {
   // non Hoodie table
   public static final String TRIPS_STATS_TEST_NAME = "trips_stats";
 
+  // empty snapshot table
+  public static final String EMPTY_SNAPSHOT_TEST_NAME = "empty_snapshot";
+
+  // empty incremental table
+  public static final String EMPTY_INCREMENTAL_TEST_NAME = "empty_incremental";
+
   @TempDir
   static java.nio.file.Path parentPath;
 
@@ -67,6 +73,8 @@ public class TestInputPathHandler {
   private static String basePathTable2 = null;
   private static String basePathTable3 = null;
   private static String basePathTable4 = null; // non hoodie Path
+  private static String basePathTable5 = null;
+  private static String basePathTable6 = null;
   private static List<String> incrementalTables;
   private static List<Path> incrementalPaths;
   private static List<Path> snapshotPaths;
@@ -110,6 +118,9 @@ static void initTables() throws IOException {
     basePathTable2 = parentPath.resolve(MODEL_TRIPS_TEST_NAME).toAbsolutePath().toString();
     basePathTable3 = parentPath.resolve(ETL_TRIPS_TEST_NAME).toAbsolutePath().toString();
     basePathTable4 = parentPath.resolve(TRIPS_STATS_TEST_NAME).toAbsolutePath().toString();
+    String tempPath = "/tmp/";
+    basePathTable5 = tempPath + EMPTY_SNAPSHOT_TEST_NAME;
+    basePathTable6 = tempPath + EMPTY_INCREMENTAL_TEST_NAME;
 
     dfs.mkdirs(new Path(basePathTable1));
     initTableType(dfs.getConf(), basePathTable1, RAW_TRIPS_TEST_NAME, HoodieTableType.MERGE_ON_READ);
@@ -126,6 +137,12 @@ static void initTables() throws IOException {
     dfs.mkdirs(new Path(basePathTable4));
     nonHoodiePaths.addAll(generatePartitions(dfs, basePathTable4));
 
+    initTableType(dfs.getConf(), basePathTable5, EMPTY_SNAPSHOT_TEST_NAME, HoodieTableType.COPY_ON_WRITE);
+    snapshotPaths.add(new Path(basePathTable5));
+
+    initTableType(dfs.getConf(), basePathTable6, EMPTY_INCREMENTAL_TEST_NAME, HoodieTableType.MERGE_ON_READ);
+    incrementalPaths.add(new Path(basePathTable6));
+
     inputPaths.addAll(incrementalPaths);
     inputPaths.addAll(snapshotPaths);
     inputPaths.addAll(nonHoodiePaths);
@@ -133,6 +150,7 @@ static void initTables() throws IOException {
     incrementalTables = new ArrayList<>();
     incrementalTables.add(RAW_TRIPS_TEST_NAME);
     incrementalTables.add(MODEL_TRIPS_TEST_NAME);
+    incrementalTables.add(EMPTY_INCREMENTAL_TEST_NAME);
   }
 
   static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
index 07a4a0250e5de..74b7120fd0a5f 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
@@ -18,29 +18,6 @@
 
 package org.apache.hudi.hadoop.realtime;
 
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Field;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.common.config.HoodieCommonConfig;
@@ -68,6 +45,30 @@
 import org.apache.hudi.hadoop.RealtimeFileStatus;
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
 import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -78,6 +79,7 @@
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.URI;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -106,9 +108,11 @@ public class TestHoodieRealtimeRecordReader {
   @BeforeEach
   public void setUp() {
     hadoopConf = HoodieTestUtils.getDefaultHadoopConf();
+    hadoopConf.set("fs.defaultFS", "file:///");
+    hadoopConf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
     baseJobConf = new JobConf(hadoopConf);
     baseJobConf.set(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024));
-    fs = FSUtils.getFs(basePath.toString(), baseJobConf);
+    fs = FSUtils.getFs(basePath.toUri().toString(), baseJobConf);
   }
 
   @TempDir
@@ -810,13 +814,14 @@ private void createDeltaCommitFile(
   public void testLogOnlyReader() throws Exception {
     // initial commit
     Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
-    HoodieTestUtils.init(hadoopConf, basePath.toString(), HoodieTableType.MERGE_ON_READ);
+    URI baseUri = basePath.toUri();
+    HoodieTestUtils.init(hadoopConf, baseUri.toString(), HoodieTableType.MERGE_ON_READ);
     String baseInstant = "100";
     File partitionDir = InputFormatTestUtil.prepareNonPartitionedParquetTable(basePath, schema, 1, 100, baseInstant,
         HoodieTableType.MERGE_ON_READ);
     FileCreateUtils.createDeltaCommit(basePath.toString(), baseInstant);
     // Add the paths
-    FileInputFormat.setInputPaths(baseJobConf, partitionDir.getPath());
+    FileInputFormat.setInputPaths(baseJobConf, partitionDir.toURI().toString());
 
     FileSlice fileSlice = new FileSlice("default", baseInstant, "fileid1");
     try {
@@ -836,7 +841,7 @@ public void testLogOnlyReader() throws Exception {
       fileSlice.addLogFile(new HoodieLogFile(writer.getLogFile().getPath(), size));
       RealtimeFileStatus realtimeFileStatus = new RealtimeFileStatus(
           new FileStatus(writer.getLogFile().getFileSize(), false, 1, 1, 0, writer.getLogFile().getPath()),
-          basePath.toString(),
+          baseUri.toString(),
           fileSlice.getLogFiles().collect(Collectors.toList()),
           false,
           Option.empty());
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
index 1185be65c196e..ccd85d382930a 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
@@ -18,9 +18,6 @@
 
 package org.apache.hudi.hadoop.testutils;
 
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.RawLocalFileSystem;
-import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieFileFormat;
@@ -37,6 +34,7 @@
 import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
 
 import org.apache.avro.Schema;
@@ -44,7 +42,10 @@
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.mapred.JobConf;
@@ -185,7 +186,7 @@ public static void setupSnapshotMaxCommitTimeQueryMode(JobConf jobConf, String m
   public static void setupSnapshotScanMode(JobConf jobConf) {
     setupSnapshotScanMode(jobConf, false);
   }
-  
+
   private static void setupSnapshotScanMode(JobConf jobConf, boolean includePending) {
     setUpScanMode(jobConf);
     String includePendingCommitsName =
@@ -373,7 +374,8 @@ public static HoodieLogFormat.Writer writeDataBlockToLogFile(File partitionDir,
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchema.toString());
     HoodieDataBlock dataBlock = null;
     if (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK) {
-      dataBlock = new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ);
+      dataBlock = new HoodieHFileDataBlock(
+          records, header, Compression.Algorithm.GZ, writer.getLogFile().getPath());
     } else if (logBlockType == HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK) {
       dataBlock = new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
     } else {
@@ -466,8 +468,8 @@ private static void setupPartition(java.nio.file.Path basePath, java.nio.file.Pa
             new LocalFileSystem(lfs),
             "0",
             new Path(basePath.toAbsolutePath().toString()),
-            new Path(partitionPath.toAbsolutePath().toString())
-        );
+            new Path(partitionPath.toAbsolutePath().toString()),
+            Option.of(HoodieFileFormat.PARQUET));
 
     partitionMetadata.trySave((int) (Math.random() * 1000));
   }
diff --git a/hudi-integ-test/README.md b/hudi-integ-test/README.md
index 7ee4598ba3bcb..6c1bad138cc18 100644
--- a/hudi-integ-test/README.md
+++ b/hudi-integ-test/README.md
@@ -126,7 +126,7 @@ NOTE : The properties-file should have all the necessary information required to
  information on what properties need to be set, take a look at the test suite section under demo steps.
 ```
 shell$ ./prepare_integration_suite.sh --spark-command
-spark-submit --packages com.databricks:spark-avro_2.11:4.0.0 --master prepare_integration_suite.sh --deploy-mode
+spark-submit --master prepare_integration_suite.sh --deploy-mode
 --properties-file  --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob target/hudi-integ-test-0.6
 .0-SNAPSHOT.jar --source-class  --source-ordering-field  --input-base-path  --target-base-path  --target-table  --props  --storage-type  --payload-class  --workload-yaml-path  --input-file-size  --<deltastreamer-ingest>
 ```
@@ -198,7 +198,6 @@ Launch a Copy-on-Write job:
 =========================
 ## Run the following command to start the test suite
 spark-submit \
---packages org.apache.spark:spark-avro_2.11:2.4.0 \
 --conf spark.task.cpus=1 \
 --conf spark.executor.cores=1 \
 --conf spark.task.maxFailures=100 \
@@ -245,7 +244,6 @@ Or a Merge-on-Read job:
 =========================
 ## Run the following command to start the test suite
 spark-submit \
---packages org.apache.spark:spark-avro_2.11:2.4.0 \
 --conf spark.task.cpus=1 \
 --conf spark.executor.cores=1 \
 --conf spark.task.maxFailures=100 \
@@ -438,7 +436,6 @@ docker exec -it adhoc-2 /bin/bash
 Sample COW command
 ```
 spark-submit \
---packages org.apache.spark:spark-avro_2.11:2.4.0 \
 --conf spark.task.cpus=1 \
 --conf spark.executor.cores=1 \
 --conf spark.task.maxFailures=100 \
diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml
index 08affb5e48dee..17e05e3c3dec0 100644
--- a/hudi-integ-test/pom.xml
+++ b/hudi-integ-test/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   <artifactId>hudi-integ-test</artifactId>
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieMultiWriterTestSuiteJob.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieMultiWriterTestSuiteJob.java
new file mode 100644
index 0000000000000..6cff499825566
--- /dev/null
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieMultiWriterTestSuiteJob.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.integ.testsuite;
+
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.utilities.UtilHelpers;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Multi write test suite job to assist in testing multi-writer scenarios. This test spins up one thread per writer as per configurations.
+ * Three params are of interest to this job in addition to regular HoodieTestsuiteJob.
+ * --input-base-paths "base_path/input1,base_path/input2"
+ * --props-paths "file:props_path/multi-writer-1.properties,file:/props_path/multi-writer-2.properties"
+ * --workload-yaml-paths "file:some_path/multi-writer-1-ds.yaml,file:/some_path/multi-writer-2-sds.yaml"
+ *
+ * Each of these should have same number of comma separated entries.
+ * Each writer will generate data in the corresponding input-base-path.
+ * and each writer will take in its own properties path and the respective yaml file as well.
+ *
+ * Common tests:
+ * Writer 1 DeltaStreamer ingesting data into partitions 0 to 10, Writer 2 Spark datasource ingesting data into partitions 100 to 110.
+ * Multiple spark datasource writers, each writing to exclusive set of partitions.
+ *
+ * Example comamnd
+ * spark-submit
+ * --packages org.apache.spark:spark-avro_2.11:2.4.0
+ * --conf spark.task.cpus=3
+ * --conf spark.executor.cores=3
+ * --conf spark.task.maxFailures=100
+ * --conf spark.memory.fraction=0.4
+ * --conf spark.rdd.compress=true
+ * --conf spark.kryoserializer.buffer.max=2000m
+ * --conf spark.serializer=org.apache.spark.serializer.KryoSerializer
+ * --conf spark.memory.storageFraction=0.1
+ * --conf spark.shuffle.service.enabled=true
+ * --conf spark.sql.hive.convertMetastoreParquet=false
+ * --conf spark.driver.maxResultSize=12g
+ * --conf spark.executor.heartbeatInterval=120s
+ * --conf spark.network.timeout=600s
+ * --conf spark.yarn.max.executor.failures=10
+ * --conf spark.sql.catalogImplementation=hive
+ * --conf spark.driver.extraClassPath=/var/demo/jars/*
+ * --conf spark.executor.extraClassPath=/var/demo/jars/*
+ * --class org.apache.hudi.integ.testsuite.HoodieMultiWriterTestSuiteJob /opt/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar
+ * --source-ordering-field test_suite_source_ordering_field
+ * --use-deltastreamer
+ * --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output
+ * --input-base-paths "/user/hive/warehouse/hudi-integ-test-suite/input1,/user/hive/warehouse/hudi-integ-test-suite/input2"
+ * --target-table hudi_table
+ * --props-paths "multi-writer-1.properties,multi-writer-2.properties"
+ * --schemaprovider-class org.apache.hudi.integ.testsuite.schema.TestSuiteFileBasedSchemaProvider
+ * --source-class org.apache.hudi.utilities.sources.AvroDFSSource --input-file-size 125829120
+ * --workload-yaml-paths "file:/opt/multi-writer-1-ds.yaml,file:/opt/multi-writer-2-sds.yaml"
+ * --workload-generator-classname org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator
+ * --table-type COPY_ON_WRITE --compact-scheduling-minshare 1
+ * --input-base-path "dummyValue"
+ * --workload-yaml-path "dummyValue"
+ * --props "dummyValue"
+ * --use-hudi-data-to-generate-updates
+ *
+ * Example command that works w/ docker.
+ *
+ */
+public class HoodieMultiWriterTestSuiteJob {
+
+  private static final Logger LOG = LogManager.getLogger(HoodieMultiWriterTestSuiteJob.class);
+
+  public static void main(String[] args) throws Exception {
+    final HoodieMultiWriterTestSuiteConfig cfg = new HoodieMultiWriterTestSuiteConfig();
+    JCommander cmd = new JCommander(cfg, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+
+    JavaSparkContext jssc = UtilHelpers.buildSparkContext("multi-writer-test-run-" + cfg.outputTypeName
+        + "-" + cfg.inputFormatName, cfg.sparkMaster);
+
+    String[] inputPaths = cfg.inputBasePaths.split(",");
+    String[] yamls = cfg.workloadYamlPaths.split(",");
+    String[] propsFiles = cfg.propsFilePaths.split(",");
+
+    if (inputPaths.length != yamls.length || yamls.length != propsFiles.length) {
+      throw new HoodieException("Input paths, property file and yaml file counts does not match ");
+    }
+
+    ExecutorService executor = Executors.newFixedThreadPool(inputPaths.length);
+
+    List<HoodieTestSuiteJob.HoodieTestSuiteConfig> testSuiteConfigList = new ArrayList<>();
+    int jobIndex = 0;
+    for (String inputPath : inputPaths) {
+      HoodieMultiWriterTestSuiteConfig testSuiteConfig = new HoodieMultiWriterTestSuiteConfig();
+      deepCopyConfigs(cfg, testSuiteConfig);
+      testSuiteConfig.inputBasePath = inputPath;
+      testSuiteConfig.workloadYamlPath = yamls[jobIndex];
+      testSuiteConfig.propsFilePath = propsFiles[jobIndex];
+      testSuiteConfigList.add(testSuiteConfig);
+      jobIndex++;
+    }
+
+    AtomicBoolean jobFailed = new AtomicBoolean(false);
+    AtomicInteger counter = new AtomicInteger(0);
+    List<CompletableFuture<Boolean>> completableFutureList = new ArrayList<>();
+    testSuiteConfigList.forEach(hoodieTestSuiteConfig -> {
+      try {
+        // start each job at 20 seconds interval so that metaClient instantiation does not overstep
+        Thread.sleep(counter.get() * 20000);
+        LOG.info("Starting job " + hoodieTestSuiteConfig.toString());
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+      completableFutureList.add(CompletableFuture.supplyAsync(() -> {
+        boolean toReturn = true;
+        try {
+          new HoodieTestSuiteJob(hoodieTestSuiteConfig, jssc, false).runTestSuite();
+          LOG.info("Job completed successfully");
+        } catch (Exception e) {
+          if (!jobFailed.getAndSet(true)) {
+            LOG.error("Exception thrown " + e.getMessage() + ", cause : " + e.getCause());
+            throw new RuntimeException("HoodieTestSuiteJob Failed " + e.getCause() + ", and msg " + e.getMessage(), e);
+          } else {
+            LOG.info("Already a job failed. so, not throwing any exception ");
+          }
+        }
+        return toReturn;
+      }, executor));
+      counter.getAndIncrement();
+    });
+
+    LOG.info("Going to await until all jobs complete");
+    try {
+      CompletableFuture completableFuture = allOfTerminateOnFailure(completableFutureList);
+      completableFuture.get();
+    } finally {
+      executor.shutdownNow();
+      if (jssc != null) {
+        LOG.info("Completed and shutting down spark context ");
+        LOG.info("Shutting down spark session and JavaSparkContext");
+        SparkSession.builder().config(jssc.getConf()).enableHiveSupport().getOrCreate().stop();
+        jssc.close();
+      }
+    }
+  }
+
+  public static CompletableFuture allOfTerminateOnFailure(List<CompletableFuture<Boolean>> futures) {
+    CompletableFuture<?> failure = new CompletableFuture();
+    AtomicBoolean jobFailed = new AtomicBoolean(false);
+    for (CompletableFuture<?> f : futures) {
+      f.exceptionally(ex -> {
+        if (!jobFailed.getAndSet(true)) {
+          System.out.println("One of the job failed. Cancelling all other futures. " + ex.getCause() + ", " + ex.getMessage());
+          futures.forEach(future -> future.cancel(true));
+        }
+        return null;
+      });
+    }
+    return CompletableFuture.anyOf(failure, CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])));
+  }
+
+  static void deepCopyConfigs(HoodieMultiWriterTestSuiteConfig globalConfig, HoodieMultiWriterTestSuiteConfig tableConfig) {
+    tableConfig.enableHiveSync = globalConfig.enableHiveSync;
+    tableConfig.enableMetaSync = globalConfig.enableMetaSync;
+    tableConfig.schemaProviderClassName = globalConfig.schemaProviderClassName;
+    tableConfig.sourceOrderingField = globalConfig.sourceOrderingField;
+    tableConfig.sourceClassName = globalConfig.sourceClassName;
+    tableConfig.tableType = globalConfig.tableType;
+    tableConfig.targetTableName = globalConfig.targetTableName;
+    tableConfig.operation = globalConfig.operation;
+    tableConfig.sourceLimit = globalConfig.sourceLimit;
+    tableConfig.checkpoint = globalConfig.checkpoint;
+    tableConfig.continuousMode = globalConfig.continuousMode;
+    tableConfig.filterDupes = globalConfig.filterDupes;
+    tableConfig.payloadClassName = globalConfig.payloadClassName;
+    tableConfig.forceDisableCompaction = globalConfig.forceDisableCompaction;
+    tableConfig.maxPendingCompactions = globalConfig.maxPendingCompactions;
+    tableConfig.maxPendingClustering = globalConfig.maxPendingClustering;
+    tableConfig.minSyncIntervalSeconds = globalConfig.minSyncIntervalSeconds;
+    tableConfig.transformerClassNames = globalConfig.transformerClassNames;
+    tableConfig.commitOnErrors = globalConfig.commitOnErrors;
+    tableConfig.compactSchedulingMinShare = globalConfig.compactSchedulingMinShare;
+    tableConfig.compactSchedulingWeight = globalConfig.compactSchedulingWeight;
+    tableConfig.deltaSyncSchedulingMinShare = globalConfig.deltaSyncSchedulingMinShare;
+    tableConfig.deltaSyncSchedulingWeight = globalConfig.deltaSyncSchedulingWeight;
+    tableConfig.sparkMaster = globalConfig.sparkMaster;
+    tableConfig.workloadDagGenerator = globalConfig.workloadDagGenerator;
+    tableConfig.outputTypeName = globalConfig.outputTypeName;
+    tableConfig.inputFormatName = globalConfig.inputFormatName;
+    tableConfig.inputParallelism = globalConfig.inputParallelism;
+    tableConfig.useDeltaStreamer = globalConfig.useDeltaStreamer;
+    tableConfig.cleanInput = globalConfig.cleanInput;
+    tableConfig.cleanOutput = globalConfig.cleanOutput;
+    tableConfig.targetBasePath = globalConfig.targetBasePath;
+  }
+
+  public static class HoodieMultiWriterTestSuiteConfig extends HoodieTestSuiteJob.HoodieTestSuiteConfig {
+
+    @Parameter(names = {"--input-base-paths"}, description = "base paths for input data"
+        + "(Will be created if did not exist first time around. If exists, more data will be added to that path)",
+        required = true)
+    public String inputBasePaths;
+
+    @Parameter(names = {
+        "--workload-yaml-paths"}, description = "Workflow Dag yaml path to generate the workload")
+    public String workloadYamlPaths;
+
+    @Parameter(names = {
+        "--props-paths"}, description = "Workflow Dag yaml path to generate the workload")
+    public String propsFilePaths;
+  }
+}
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java
index fe81f0c075c99..2d9f841ae351c 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.integ.testsuite;
 
-import org.apache.avro.Schema;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -48,6 +47,7 @@
 
 import com.beust.jcommander.JCommander;
 import com.beust.jcommander.Parameter;
+import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -93,13 +93,19 @@ public class HoodieTestSuiteJob {
    */
   private transient HiveConf hiveConf;
 
+  private boolean stopJsc = true;
   private BuiltinKeyGenerator keyGenerator;
   private transient HoodieTableMetaClient metaClient;
 
   public HoodieTestSuiteJob(HoodieTestSuiteConfig cfg, JavaSparkContext jsc) throws IOException {
+    this(cfg, jsc, true);
+  }
+
+  public HoodieTestSuiteJob(HoodieTestSuiteConfig cfg, JavaSparkContext jsc, boolean stopJsc) throws IOException {
     log.warn("Running spark job w/ app id " + jsc.sc().applicationId());
     this.cfg = cfg;
     this.jsc = jsc;
+    this.stopJsc = stopJsc;
     cfg.propsFilePath = FSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString();
     this.sparkSession = SparkSession.builder().config(jsc.getConf()).enableHiveSupport().getOrCreate();
     this.fs = FSUtils.getFs(cfg.inputBasePath, jsc.hadoopConfiguration());
@@ -108,11 +114,15 @@ public HoodieTestSuiteJob(HoodieTestSuiteConfig cfg, JavaSparkContext jsc) throw
     this.hiveConf = getDefaultHiveConf(jsc.hadoopConfiguration());
     this.keyGenerator = (BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
 
-    metaClient = HoodieTableMetaClient.withPropertyBuilder()
-        .setTableType(cfg.tableType)
-        .setTableName(cfg.targetTableName)
-        .setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
-        .initTable(jsc.hadoopConfiguration(), cfg.targetBasePath);
+    if (!fs.exists(new Path(cfg.targetBasePath))) {
+      metaClient = HoodieTableMetaClient.withPropertyBuilder()
+          .setTableType(cfg.tableType)
+          .setTableName(cfg.targetTableName)
+          .setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
+          .initTable(jsc.hadoopConfiguration(), cfg.targetBasePath);
+    } else {
+      metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(cfg.targetBasePath).build();
+    }
 
     if (cfg.cleanInput) {
       Path inputPath = new Path(cfg.inputBasePath);
@@ -167,15 +177,15 @@ public static void main(String[] args) throws Exception {
 
     JavaSparkContext jssc = UtilHelpers.buildSparkContext("workload-generator-" + cfg.outputTypeName
         + "-" + cfg.inputFormatName, cfg.sparkMaster);
-    new HoodieTestSuiteJob(cfg, jssc).runTestSuite();
+    new HoodieTestSuiteJob(cfg, jssc, true).runTestSuite();
   }
 
   public WorkflowDag createWorkflowDag() throws IOException {
     WorkflowDag workflowDag = this.cfg.workloadYamlPath == null ? ((WorkflowDagGenerator) ReflectionUtils
         .loadClass((this.cfg).workloadDagGenerator)).build()
         : DagUtils.convertYamlPathToDag(
-            FSUtils.getFs(this.cfg.workloadYamlPath, jsc.hadoopConfiguration(), true),
-            this.cfg.workloadYamlPath);
+        FSUtils.getFs(this.cfg.workloadYamlPath, jsc.hadoopConfiguration(), true),
+        this.cfg.workloadYamlPath);
     return workflowDag;
   }
 
@@ -207,11 +217,13 @@ public void runTestSuite() {
       log.error("Failed to run Test Suite ", e);
       throw new HoodieException("Failed to run Test Suite ", e);
     } finally {
-      stopQuietly();
+      if (stopJsc) {
+        stopQuietly();
+      }
     }
   }
 
-  private void stopQuietly() {
+  protected void stopQuietly() {
     try {
       sparkSession.stop();
       jsc.stop();
@@ -295,5 +307,8 @@ public static class HoodieTestSuiteConfig extends HoodieDeltaStreamer.Config {
 
     @Parameter(names = {"--start-hive-metastore"}, description = "Start Hive Metastore to use for optimistic lock ")
     public Boolean startHiveMetastore = false;
+
+    @Parameter(names = {"--use-hudi-data-to-generate-updates"}, description = "Use data from hudi to generate updates for new batches ")
+    public Boolean useHudiToGenerateUpdates = false;
   }
 }
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java
new file mode 100644
index 0000000000000..c4f782fe40864
--- /dev/null
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite;
+
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.integ.testsuite.SparkDataSourceContinuousIngest;
+import org.apache.hudi.utilities.HoodieRepairTool;
+import org.apache.hudi.utilities.IdentitySplitter;
+import org.apache.hudi.utilities.UtilHelpers;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Sample command
+ *
+ * ./bin/spark-submit --packages org.apache.spark:spark-avro_2.11:2.4.4 --driver-memory 4g   --executor-memory 4g \
+ * --conf spark.serializer=org.apache.spark.serializer.KryoSerializer   --conf spark.sql.catalogImplementation=hive \
+ * --class org.apache.hudi.integ.testsuite.SparkDSContinuousIngestTool \
+ * ${HUDI_ROOT_DIR}/packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar \
+ * --source-path file:${SOURCE_DIR}/spark_ds_continuous   --checkpoint-file-path /tmp/hudi/checkpoint  \
+ * --base-path file:///tmp/hudi/tbl_path/   --props /tmp/hudi_props.out
+ *
+ * Contents of hudi.properties
+ *
+ * hoodie.insert.shuffle.parallelism=4
+ * hoodie.upsert.shuffle.parallelism=4
+ * hoodie.bulkinsert.shuffle.parallelism=4
+ * hoodie.delete.shuffle.parallelism=4
+ * hoodie.datasource.write.recordkey.field=VendorID
+ * hoodie.datasource.write.partitionpath.field=date_col
+ * hoodie.datasource.write.operation=upsert
+ * hoodie.datasource.write.precombine.field=tpep_pickup_datetime
+ * hoodie.metadata.enable=false
+ * hoodie.table.name=hudi_tbl
+ */
+
+public class SparkDataSourceContinuousIngestTool {
+
+  private static final Logger LOG = LogManager.getLogger(SparkDataSourceContinuousIngestTool.class);
+
+  private final Config cfg;
+  // Properties with source, hoodie client, key generator etc.
+  private TypedProperties props;
+  private HoodieSparkEngineContext context;
+  private SparkSession sparkSession;
+
+  public SparkDataSourceContinuousIngestTool(JavaSparkContext jsc, Config cfg) {
+    if (cfg.propsFilePath != null) {
+      cfg.propsFilePath = FSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString();
+    }
+    this.context = new HoodieSparkEngineContext(jsc);
+    this.sparkSession = SparkSession.builder().config(jsc.getConf()).getOrCreate();
+    this.cfg = cfg;
+    this.props = cfg.propsFilePath == null
+        ? UtilHelpers.buildProperties(cfg.configs)
+        : readConfigFromFileSystem(jsc, cfg);
+  }
+
+  public static void main(String[] args) {
+    final Config cfg = new Config();
+    JCommander cmd = new JCommander(cfg, null, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+    final JavaSparkContext jsc = UtilHelpers.buildSparkContext("spark-datasource-continuous-ingestion-tool", cfg.sparkMaster, cfg.sparkMemory);
+    try {
+      new SparkDataSourceContinuousIngestTool(jsc, cfg).run();
+    } catch (Throwable throwable) {
+      LOG.error("Fail to run Continuous Ingestion for spark datasource " + cfg.basePath, throwable);
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public void run() {
+    try {
+      SparkDataSourceContinuousIngest sparkDataSourceContinuousIngest =
+          new SparkDataSourceContinuousIngest(sparkSession, context.getHadoopConf().get(), new Path(cfg.sourcePath), cfg.sparkFormat,
+              new Path(cfg.checkpointFilePath), new Path(cfg.basePath), getPropsAsMap(props),
+              cfg.minSyncIntervalSeconds);
+      sparkDataSourceContinuousIngest.startIngestion();
+    } finally {
+      sparkSession.stop();
+      context.getJavaSparkContext().stop();
+    }
+  }
+
+  private Map<String, String> getPropsAsMap(TypedProperties typedProperties) {
+    Map<String, String> props = new HashMap<>();
+    typedProperties.entrySet().forEach(entry -> props.put(entry.getKey().toString(), entry.getValue().toString()));
+    return props;
+  }
+
+  /**
+   * Reads config from the file system.
+   *
+   * @param jsc {@link JavaSparkContext} instance.
+   * @param cfg {@link HoodieRepairTool.Config} instance.
+   * @return the {@link TypedProperties} instance.
+   */
+  private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
+    return UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(cfg.propsFilePath), cfg.configs)
+        .getProps(true);
+  }
+
+  public static class Config implements Serializable {
+    @Parameter(names = {"--source-path", "-sp"}, description = "Source path for the parquet data to consume", required = true)
+    public String sourcePath = null;
+    @Parameter(names = {"--source-format", "-sf"}, description = "source data format", required = false)
+    public String sparkFormat = "parquet";
+    @Parameter(names = {"--checkpoint-file-path", "-cpf"}, description = "Checkpoint file path to store/fetch checkpointing info", required = true)
+    public String checkpointFilePath = null;
+    @Parameter(names = {"--base-path", "-bp"}, description = "Base path for the hudi table", required = true)
+    public String basePath = null;
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
+    public String sparkMaster = null;
+    @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
+    public String sparkMemory = "1g";
+    @Parameter(names = {"--min-sync-interval-seconds"},
+        description = "the min sync interval of each sync in continuous mode")
+    public Integer minSyncIntervalSeconds = 0;
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
+    @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for "
+        + "hoodie client for table repair")
+    public String propsFilePath = null;
+
+    @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
+        + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
+        splitter = IdentitySplitter.class)
+    public List<String> configs = new ArrayList<>();
+  }
+}
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DFSDeltaConfig.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DFSDeltaConfig.java
index 0ac36687f485c..f6c8c8fc306b0 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DFSDeltaConfig.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DFSDeltaConfig.java
@@ -40,18 +40,20 @@ public class DFSDeltaConfig extends DeltaConfig {
   private int inputParallelism;
   // Whether to delete older input data once it has been ingested
   private boolean deleteOldInputData;
+  private boolean useHudiToGenerateUpdates;
 
   public DFSDeltaConfig(DeltaOutputMode deltaOutputMode, DeltaInputType deltaInputType,
                         SerializableConfiguration configuration,
                         String deltaBasePath, String targetBasePath, String schemaStr, Long maxFileSize,
-                        int inputParallelism, boolean deleteOldInputData) {
-    super(deltaOutputMode, deltaInputType, configuration);
+                        int inputParallelism, boolean deleteOldInputData, boolean useHudiToGenerateUpdates) {
+     super(deltaOutputMode, deltaInputType, configuration);
     this.deltaBasePath = deltaBasePath;
     this.schemaStr = schemaStr;
     this.maxFileSize = maxFileSize;
     this.datasetOutputPath = targetBasePath;
     this.inputParallelism = inputParallelism;
     this.deleteOldInputData = deleteOldInputData;
+    this.useHudiToGenerateUpdates = useHudiToGenerateUpdates;
   }
 
   public String getDeltaBasePath() {
@@ -85,4 +87,8 @@ public int getInputParallelism() {
   public boolean shouldDeleteOldInputData() {
     return deleteOldInputData;
   }
+
+  public boolean shouldUseHudiToGenerateUpdates() {
+    return useHudiToGenerateUpdates;
+  }
 }
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/WriterContext.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/WriterContext.java
index 27760f7116ecd..d31ef195ecdd5 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/WriterContext.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/WriterContext.java
@@ -72,7 +72,7 @@ public void initContext(JavaSparkContext jsc) throws HoodieException {
       this.deltaGenerator = new DeltaGenerator(
           new DFSDeltaConfig(DeltaOutputMode.valueOf(cfg.outputTypeName), DeltaInputType.valueOf(cfg.inputFormatName),
               new SerializableConfiguration(jsc.hadoopConfiguration()), cfg.inputBasePath, cfg.targetBasePath,
-              schemaStr, cfg.limitFileSize, inputParallelism, cfg.deleteOldInput),
+              schemaStr, cfg.limitFileSize, inputParallelism, cfg.deleteOldInput, cfg.useHudiToGenerateUpdates),
           jsc, sparkSession, schemaStr, keyGenerator);
       log.info(String.format("Initialized writerContext with: %s", schemaStr));
     } catch (Exception e) {
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteNode.java
index b538b01d17496..8eaea65413709 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteNode.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteNode.java
@@ -38,7 +38,7 @@ public DeleteNode(Config config) {
   @Override
   protected void generate(DeltaGenerator deltaGenerator) throws Exception {
     if (!config.isDisableGenerate()) {
-      deltaGenerator.writeRecords(deltaGenerator.generateDeletes(config)).count();
+      deltaGenerator.writeRecords(deltaGenerator.generateDeletes(config)).getValue().count();
     }
   }
 
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/InsertNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/InsertNode.java
index f5cf56b99c3ce..33cce79e0d1bf 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/InsertNode.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/InsertNode.java
@@ -59,7 +59,7 @@ public void execute(ExecutionContext executionContext, int curItrCount) throws E
   protected void generate(DeltaGenerator deltaGenerator) throws Exception {
     if (!config.isDisableGenerate()) {
       log.info("Generating input data for node {}", this.getName());
-      this.deltaWriteStatsRDD = deltaGenerator.writeRecords(deltaGenerator.generateInserts(config));
+      this.deltaWriteStatsRDD = deltaGenerator.writeRecords(deltaGenerator.generateInserts(config)).getValue();
       this.deltaWriteStatsRDD.cache();
       this.deltaWriteStatsRDD.count();
     }
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/UpsertNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/UpsertNode.java
index 1377a4d6b80a2..427ee74b63882 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/UpsertNode.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/UpsertNode.java
@@ -38,7 +38,7 @@ public UpsertNode(Config config) {
   protected void generate(DeltaGenerator deltaGenerator) throws Exception {
     if (!config.isDisableGenerate()) {
       log.info("Generating input data {}", this.getName());
-      deltaGenerator.writeRecords(deltaGenerator.generateUpdates(config)).count();
+      deltaGenerator.writeRecords(deltaGenerator.generateUpdates(config)).getValue().count();
     }
   }
 
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
index 69e32dfbc1182..e7bc7b00a82a4 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
@@ -18,24 +18,9 @@
 
 package org.apache.hudi.integ.testsuite.generator;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.io.UncheckedIOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-import java.util.stream.StreamSupport;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.integ.testsuite.configuration.DFSDeltaConfig;
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config;
 import org.apache.hudi.integ.testsuite.converter.Converter;
@@ -51,6 +36,9 @@
 import org.apache.hudi.integ.testsuite.writer.DeltaWriterFactory;
 import org.apache.hudi.keygen.BuiltinKeyGenerator;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
@@ -58,6 +46,20 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.StreamSupport;
+
 import scala.Tuple2;
 
 /**
@@ -85,7 +87,7 @@ public DeltaGenerator(DFSDeltaConfig deltaOutputConfig, JavaSparkContext jsc, Sp
     this.partitionPathFieldNames = keyGenerator.getPartitionPathFields();
   }
 
-  public JavaRDD<DeltaWriteStats> writeRecords(JavaRDD<GenericRecord> records) {
+  public Pair<Integer, JavaRDD<DeltaWriteStats>> writeRecords(JavaRDD<GenericRecord> records) {
     if (deltaOutputConfig.shouldDeleteOldInputData() && batchId > 1) {
       Path oldInputDir = new Path(deltaOutputConfig.getDeltaBasePath(), Integer.toString(batchId - 1));
       try {
@@ -107,7 +109,7 @@ public JavaRDD<DeltaWriteStats> writeRecords(JavaRDD<GenericRecord> records) {
       }
     }).flatMap(List::iterator);
     batchId++;
-    return ws;
+    return Pair.of(batchId, ws);
   }
 
   public int getBatchId() {
@@ -156,15 +158,22 @@ public JavaRDD<GenericRecord> generateUpdates(Config config) throws IOException
           adjustedRDD = deltaInputReader.read(config.getNumRecordsUpsert());
           adjustedRDD = adjustRDDToGenerateExactNumUpdates(adjustedRDD, jsc, config.getNumRecordsUpsert());
         } else {
-          deltaInputReader =
-              new DFSHoodieDatasetInputReader(jsc, ((DFSDeltaConfig) deltaOutputConfig).getDatasetOutputPath(),
-                  schemaStr);
-          if (config.getFractionUpsertPerFile() > 0) {
-            adjustedRDD = deltaInputReader.read(config.getNumUpsertPartitions(), config.getNumUpsertFiles(),
-                config.getFractionUpsertPerFile());
+          if (((DFSDeltaConfig) deltaOutputConfig).shouldUseHudiToGenerateUpdates()) {
+            deltaInputReader =
+                new DFSHoodieDatasetInputReader(jsc, ((DFSDeltaConfig) deltaOutputConfig).getDeltaBasePath(),
+                    schemaStr);
+            if (config.getFractionUpsertPerFile() > 0) {
+              adjustedRDD = deltaInputReader.read(config.getNumUpsertPartitions(), config.getNumUpsertFiles(),
+                  config.getFractionUpsertPerFile());
+            } else {
+              adjustedRDD = deltaInputReader.read(config.getNumUpsertPartitions(), config.getNumUpsertFiles(), config
+                  .getNumRecordsUpsert());
+            }
           } else {
-            adjustedRDD = deltaInputReader.read(config.getNumUpsertPartitions(), config.getNumUpsertFiles(), config
-                .getNumRecordsUpsert());
+            deltaInputReader = new DFSAvroDeltaInputReader(sparkSession, schemaStr,
+                ((DFSDeltaConfig) deltaOutputConfig).getDeltaBasePath(), Option.empty(), Option.empty());
+            adjustedRDD = deltaInputReader.read(config.getNumRecordsUpsert());
+            adjustedRDD = adjustRDDToGenerateExactNumUpdates(adjustedRDD, jsc, config.getNumRecordsUpsert());
           }
         }
 
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngest.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngest.scala
new file mode 100644
index 0000000000000..9ead7f290a06e
--- /dev/null
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngest.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
+import org.apache.log4j.LogManager
+import org.apache.spark.sql.{SaveMode, SparkSession}
+
+import java.io.Serializable
+
+class SparkDataSourceContinuousIngest(val spark: SparkSession, val conf: Configuration, val sourcePath: Path,
+                                      val sourceFormat: String, val checkpointFile: Path, hudiBasePath: Path, hudiOptions: java.util.Map[String, String],
+                                      minSyncIntervalSeconds: Long) extends Serializable {
+
+  private val log = LogManager.getLogger(getClass)
+
+  def startIngestion(): Unit = {
+    val fs = sourcePath.getFileSystem(conf)
+    var checkPointFs = checkpointFile.getFileSystem(conf)
+    var orderedBatch : Array[FileStatus] = null
+    if (checkPointFs.exists(checkpointFile)) {
+      log.info("Checkpoint file exists. ")
+      val checkpoint = spark.sparkContext.textFile(checkpointFile.toString).collect()(0)
+      log.warn("Checkpoint to resume from " + checkpoint)
+
+      orderedBatch = fetchListOfFilesToConsume(fs, sourcePath, new PathFilter {
+        override def accept(path: Path): Boolean = {
+          path.getName.toLong > checkpoint.toLong
+        }
+      })
+      if (log.isDebugEnabled) {
+        log.debug("List of batches to consume in order ")
+        orderedBatch.foreach(entry => log.warn(" " + entry.getPath.getName))
+      }
+
+    } else {
+      log.warn("No checkpoint file exists. Starting from scratch ")
+      orderedBatch = fetchListOfFilesToConsume(fs, sourcePath, new PathFilter {
+        override def accept(path: Path): Boolean = {
+          true
+        }
+      })
+      if (log.isDebugEnabled) {
+        log.debug("List of batches to consume in order ")
+        orderedBatch.foreach(entry => log.warn(" " + entry.getPath.getName))
+      }
+    }
+
+    orderedBatch.foreach(entry => {
+      log.info("Consuming from batch " + entry)
+      val pathToConsume = new Path(sourcePath.toString + "/" + entry.getPath.getName)
+      val df = spark.read.format(sourceFormat).load(pathToConsume.toString)
+
+      df.write.format("hudi").options(hudiOptions).mode(SaveMode.Append).save(hudiBasePath.toString)
+      writeToFile(checkpointFile, entry.getPath.getName, checkPointFs)
+      log.info("Completed batch " + entry + ". Moving to next batch. Sleeping for " + minSyncIntervalSeconds + " secs before next batch")
+      Thread.sleep(minSyncIntervalSeconds * 1000)
+    })
+  }
+
+  def fetchListOfFilesToConsume(fs: FileSystem, basePath: Path, pathFilter: PathFilter): Array[FileStatus] = {
+    val nextBatches = fs.listStatus(basePath, pathFilter)
+    nextBatches.sortBy(fileStatus => fileStatus.getPath.getName.toLong)
+  }
+
+  def writeToFile(checkpointFilePath: Path, str: String, fs: FileSystem): Unit = {
+    if (!fs.exists(checkpointFilePath)) {
+      fs.create(checkpointFilePath)
+    }
+    val fsOutStream = fs.create(checkpointFilePath, true)
+    fsOutStream.writeBytes(str)
+    fsOutStream.flush()
+    fsOutStream.close()
+  }
+}
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala
index 6654264a969c9..ac254bea8dad0 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala
@@ -46,7 +46,7 @@ class SparkBulkInsertNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus
    */
   override def execute(context: ExecutionContext, curItrCount: Int): Unit = {
     if (!config.isDisableGenerate) {
-      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config)).count()
+      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config)).getValue().count()
     }
     val inputDF = AvroConversionUtils.createDataFrame(context.getWriterContext.getHoodieTestSuiteWriter.getNextBatch,
       context.getWriterContext.getHoodieTestSuiteWriter.getSchema,
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkDeleteNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkDeleteNode.scala
index 645787a873e04..ecf94b94ec975 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkDeleteNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkDeleteNode.scala
@@ -19,7 +19,6 @@
 package org.apache.hudi.integ.testsuite.dag.nodes
 
 import org.apache.avro.Schema
-import org.apache.avro.generic.GenericRecord
 import org.apache.hudi.client.WriteStatus
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config
@@ -51,39 +50,26 @@ class SparkDeleteNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus]] {
   override def execute(context: ExecutionContext, curItrCount: Int): Unit = {
     // Deletes can't be fetched using getNextBatch() bcoz, getInsert(schema) from payload will return empty for delete
     // records
-    val genRecsRDD = generateRecordsForDelete(config, context)
+
+    val batchIdRecords = context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateDeletes(config))
+    batchIdRecords.getValue().count()
+
+    val pathToRead = context.getWriterContext.getCfg.inputBasePath + "/" + batchIdRecords.getKey()
+    val avroDf = context.getWriterContext.getSparkSession.read.format("avro").load(pathToRead)
+    val genRecsRDD = HoodieSparkUtils.createRdd(avroDf, "testStructName", "testNamespace", false,
+      org.apache.hudi.common.util.Option.of(new Schema.Parser().parse(context.getWriterContext.getHoodieTestSuiteWriter.getSchema)))
+
     val inputDF = AvroConversionUtils.createDataFrame(genRecsRDD,
       context.getWriterContext.getHoodieTestSuiteWriter.getSchema,
       context.getWriterContext.getSparkSession)
+
     inputDF.write.format("hudi")
       .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
       .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
       .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
       .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL)
-      .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key")
-      .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
       .option(HoodieWriteConfig.TBL_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
       .mode(SaveMode.Append)
       .save(context.getHoodieTestSuiteWriter.getWriteConfig.getBasePath)
   }
-
-  /**
-   * Generates records for delete operations in Spark.
-   *
-   * @param config  Node configs.
-   * @param context The context needed for an execution of a node.
-   * @return Records in {@link RDD}.
-   */
-  private def generateRecordsForDelete(config: Config, context: ExecutionContext): RDD[GenericRecord] = {
-    if (!config.isDisableGenerate) {
-      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateDeletes(config)).count()
-    }
-
-    context.getWriterContext.getHoodieTestSuiteWriter.getNextBatchForDeletes()
-    val pathToRead = context.getWriterContext.getCfg.inputBasePath + "/" + context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse("")
-
-    val avroDf = context.getWriterContext.getSparkSession.read.format("avro").load(pathToRead)
-    HoodieSparkUtils.createRdd(avroDf, "testStructName", "testNamespace", false,
-      org.apache.hudi.common.util.Option.of(new Schema.Parser().parse(context.getWriterContext.getHoodieTestSuiteWriter.getSchema)))
-  }
 }
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala
index b0bec48a40e83..bea5ae3d6fdfc 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala
@@ -18,11 +18,16 @@
 
 package org.apache.hudi.integ.testsuite.dag.nodes
 
+import org.apache.avro.Schema
 import org.apache.hudi.client.WriteStatus
+import org.apache.hudi.common.util.collection.Pair
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config
 import org.apache.hudi.integ.testsuite.dag.ExecutionContext
-import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions}
+import org.apache.hudi.integ.testsuite.writer.DeltaWriteStats
+import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions, HoodieSparkUtils}
+import org.apache.log4j.LogManager
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SaveMode
 
@@ -35,6 +40,7 @@ import scala.collection.JavaConverters._
  */
 class SparkInsertNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus]] {
 
+  private val log = LogManager.getLogger(getClass)
   config = dagNodeConfig
 
   /**
@@ -45,21 +51,26 @@ class SparkInsertNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus]] {
    * @throws Exception Thrown if the execution failed.
    */
   override def execute(context: ExecutionContext, curItrCount: Int): Unit = {
-    if (!config.isDisableGenerate) {
-      println("Generating input data for node {}", this.getName)
-      writeRecords(context)
-    }
-    val inputDF = AvroConversionUtils.createDataFrame(context.getWriterContext.getHoodieTestSuiteWriter.getNextBatch,
+    println("Generating input data for node {}", this.getName)
+
+    val batchIdRecords = writeRecords(context)
+    batchIdRecords.getValue().count()
+
+    val pathToRead = context.getWriterContext.getCfg.inputBasePath + "/" + batchIdRecords.getKey()
+    val avroDf = context.getWriterContext.getSparkSession.read.format("avro").load(pathToRead)
+    val genRecsRDD = HoodieSparkUtils.createRdd(avroDf, "testStructName", "testNamespace", false,
+      org.apache.hudi.common.util.Option.of(new Schema.Parser().parse(context.getWriterContext.getHoodieTestSuiteWriter.getSchema)))
+
+    val inputDF = AvroConversionUtils.createDataFrame(genRecsRDD,
       context.getWriterContext.getHoodieTestSuiteWriter.getSchema,
       context.getWriterContext.getSparkSession)
+
     inputDF.write.format("hudi")
       .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
       .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key(), "test_suite_source_ordering_field")
       .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
       .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
       .option(DataSourceWriteOptions.OPERATION.key, getOperation())
-      .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key")
-      .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
       .option(HoodieWriteConfig.TBL_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
       .mode(SaveMode.Append)
       .save(context.getHoodieTestSuiteWriter.getWriteConfig.getBasePath)
@@ -69,7 +80,7 @@ class SparkInsertNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus]] {
     DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL
   }
 
-  def writeRecords(context: ExecutionContext): Unit = {
-    context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config)).count()
+  def writeRecords(context: ExecutionContext): Pair[Integer, JavaRDD[DeltaWriteStats]] = {
+     context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config))
   }
 }
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala
index f83bc55633db2..76e7576b176af 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala
@@ -19,8 +19,12 @@
 package org.apache.hudi.integ.testsuite.dag.nodes
 
 import org.apache.hudi.DataSourceWriteOptions
+import org.apache.hudi.common.util.collection.Pair
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config
 import org.apache.hudi.integ.testsuite.dag.ExecutionContext
+import org.apache.hudi.integ.testsuite.writer.DeltaWriteStats
+import org.apache.log4j.LogManager
+import org.apache.spark.api.java.JavaRDD
 
 /**
  * Spark datasource based upsert node
@@ -29,11 +33,46 @@ import org.apache.hudi.integ.testsuite.dag.ExecutionContext
  */
 class SparkUpsertNode(dagNodeConfig: Config) extends SparkInsertNode(dagNodeConfig) {
 
+  private val log = LogManager.getLogger(getClass)
+
   override def getOperation(): String = {
     DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL
   }
 
-  override def writeRecords(context: ExecutionContext): Unit = {
-    context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateUpdates(config)).count()
+  override def writeRecords(context: ExecutionContext): Pair[Integer, JavaRDD[DeltaWriteStats]] = {
+    context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateUpdates(config))
   }
+
+  /**
+   * Execute the {@link DagNode}.
+   *
+   * @param context     The context needed for an execution of a node.
+   * @param curItrCount iteration count for executing the node.
+   * @throws Exception Thrown if the execution failed.
+   */
+  /*override def execute(context: ExecutionContext, curItrCount: Int): Unit = {
+    println("Generating input data for node {}", this.getName)
+
+    val batchIdRecords = writeRecords(context)
+    batchIdRecords.getValue().count()
+
+    val pathToRead = context.getWriterContext.getCfg.inputBasePath + "/" + batchIdRecords.getKey()
+    val avroDf = context.getWriterContext.getSparkSession.read.format("avro").load(pathToRead)
+    val genRecsRDD = HoodieSparkUtils.createRdd(avroDf, "testStructName", "testNamespace", false,
+      org.apache.hudi.common.util.Option.of(new Schema.Parser().parse(context.getWriterContext.getHoodieTestSuiteWriter.getSchema)))
+
+    val inputDF = AvroConversionUtils.createDataFrame(genRecsRDD,
+      context.getWriterContext.getHoodieTestSuiteWriter.getSchema,
+      context.getWriterContext.getSparkSession)
+
+    inputDF.write.format("hudi")
+      .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
+      .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key(), "test_suite_source_ordering_field")
+      .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
+      .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
+      .option(DataSourceWriteOptions.OPERATION.key, getOperation())
+      .option(HoodieWriteConfig.TBL_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
+      .mode(SaveMode.Append)
+      .save(context.getHoodieTestSuiteWriter.getWriteConfig.getBasePath)
+  }*/
 }
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/BaseSparkSqlNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/BaseSparkSqlNode.scala
index ce6a40efbced0..83e5598d49378 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/BaseSparkSqlNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/BaseSparkSqlNode.scala
@@ -57,7 +57,7 @@ abstract class BaseSparkSqlNode(dagNodeConfig: Config) extends DagNode[RDD[Write
    */
   def prepareData(context: ExecutionContext): RDD[GenericRecord] = {
     if (!config.isDisableGenerate) {
-      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config)).count()
+      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config)).getValue().count()
     }
     context.getWriterContext.getHoodieTestSuiteWriter.getNextBatch
   }
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala
index 3db6aa2ccf557..dabe54d822ba6 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala
@@ -58,7 +58,7 @@ class SparkSqlCreateTableNode(dagNodeConfig: Config) extends DagNode[RDD[WriteSt
     if (config.shouldUseCtas) {
       // Prepares data for CTAS query
       if (!config.isDisableGenerate) {
-        context.getDeltaGenerator.writeRecords(context.getDeltaGenerator.generateInserts(config)).count()
+        context.getDeltaGenerator.writeRecords(context.getDeltaGenerator.generateInserts(config)).getValue().count()
       }
       val nextBatch = context.getWriterContext.getHoodieTestSuiteWriter.getNextBatch
       val sparkSession = context.getWriterContext.getSparkSession
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlDeleteNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlDeleteNode.scala
index 847381f8cc588..645f2030b32e1 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlDeleteNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlDeleteNode.scala
@@ -48,7 +48,7 @@ class SparkSqlDeleteNode(dagNodeConfig: Config) extends BaseSparkSqlNode(dagNode
       context.getWriterContext.getCfg.targetTableName, sparkSession.sparkContext.defaultParallelism)
     LOG.info("Number of records to delete: " + recordsToDelete.count())
     // The update records corresponding to the SQL are only used for data validation
-    context.getDeltaGenerator().writeRecords(recordsToDelete).count()
+    context.getDeltaGenerator().writeRecords(recordsToDelete).getValue().count()
     recordsToDelete
   }
 
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlMergeNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlMergeNode.scala
index b03230beb4cbc..52ba6be21ae23 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlMergeNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlMergeNode.scala
@@ -42,7 +42,7 @@ class SparkSqlMergeNode(dagNodeConfig: Config) extends BaseSparkSqlNode(dagNodeC
    */
   override def prepareData(context: ExecutionContext): RDD[GenericRecord] = {
     if (!config.isDisableGenerate) {
-      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateUpdates(config)).count()
+      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateUpdates(config)).getValue().count()
     }
     context.getWriterContext.getHoodieTestSuiteWriter.getNextBatch
   }
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlUpdateNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlUpdateNode.scala
index fdc799feaddec..7405d3ff48cab 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlUpdateNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlUpdateNode.scala
@@ -48,7 +48,7 @@ class SparkSqlUpdateNode(dagNodeConfig: Config) extends BaseSparkSqlNode(dagNode
       context.getWriterContext.getCfg.targetTableName, sparkSession.sparkContext.defaultParallelism)
     LOG.info("Number of records to update: " + recordsToUpdate.count())
     // The update records corresponding to the SQL are only used for data validation
-    context.getDeltaGenerator().writeRecords(recordsToUpdate).count()
+    context.getDeltaGenerator().writeRecords(recordsToUpdate).getValue().count()
     recordsToUpdate
   }
 
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
index 4c0265ce90f64..db87f5dce0087 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
@@ -115,7 +115,7 @@ static String getSparkShellCommand(String commandFile) {
         .append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
         .append(
             " --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client  --driver-memory 1G --executor-memory 1G --num-executors 1 ")
-        .append(" --packages org.apache.spark:spark-avro_2.11:2.4.4 ").append(" -i ").append(commandFile).toString();
+        .append(" -i ").append(commandFile).toString();
   }
 
   static String getPrestoConsoleCommand(String commandFile) {
@@ -223,7 +223,7 @@ private TestExecStartResultCallback executeCommandInDocker(String containerName,
 
     boolean completed =
         dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback)
-        .awaitCompletion(540, SECONDS);
+            .awaitCompletion(540, SECONDS);
     if (!completed) {
       callback.getStderr().flush();
       callback.getStdout().flush();
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java
index a6a4c3ec4201e..e6a4b6146273c 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java
@@ -60,7 +60,7 @@ public void testValidateSync() throws Exception {
   }
 
   private void syncHoodieTable(String hiveTableName, String op) throws Exception {
-    StringBuilder cmdBuilder = new StringBuilder("spark-submit --packages org.apache.spark:spark-avro_2.11:2.4.4 ")
+    StringBuilder cmdBuilder = new StringBuilder("spark-submit")
         .append(" --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer ").append(HUDI_UTILITIES_BUNDLE)
         .append(" --table-type COPY_ON_WRITE ")
         .append(" --base-file-format ").append(HoodieFileFormat.PARQUET.toString())
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java
index ff92bd037d558..4a148da7954ac 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java
@@ -125,7 +125,7 @@ public void testDFSTwoFilesWriteWithRollover() throws IOException {
   public void testDFSWorkloadSinkWithMultipleFilesFunctional() throws IOException {
     DeltaConfig dfsSinkConfig = new DFSDeltaConfig(DeltaOutputMode.DFS, DeltaInputType.AVRO,
         new SerializableConfiguration(jsc.hadoopConfiguration()), dfsBasePath, dfsBasePath,
-        schemaProvider.getSourceSchema().toString(), 10240L, jsc.defaultParallelism(), false);
+        schemaProvider.getSourceSchema().toString(), 10240L, jsc.defaultParallelism(), false, false);
     DeltaWriterAdapter<GenericRecord> dfsDeltaWriterAdapter = DeltaWriterFactory
         .getDeltaWriterAdapter(dfsSinkConfig, 1);
     FlexibleSchemaRecordGenerationIterator itr = new FlexibleSchemaRecordGenerationIterator(1000,
diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml
index 961855b034867..1bfb9765035e6 100644
--- a/hudi-kafka-connect/pom.xml
+++ b/hudi-kafka-connect/pom.xml
@@ -19,13 +19,13 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-kafka-connect</artifactId>
     <description>Kafka Connect Sink Connector for Hudi</description>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <properties>
diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/BufferedConnectWriter.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/BufferedConnectWriter.java
index 0e92e674d42da..111c74f3a4883 100644
--- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/BufferedConnectWriter.java
+++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/BufferedConnectWriter.java
@@ -96,9 +96,9 @@ public List<WriteStatus> flushRecords() {
     try {
       LOG.info("Number of entries in MemoryBasedMap => "
           + bufferedRecords.getInMemoryMapNumEntries()
-          + "Total size in bytes of MemoryBasedMap => "
-          + bufferedRecords.getCurrentInMemoryMapSize() + "Number of entries in BitCaskDiskMap => "
-          + bufferedRecords.getDiskBasedMapNumEntries() + "Size of file spilled to disk => "
+          + ", Total size in bytes of MemoryBasedMap => "
+          + bufferedRecords.getCurrentInMemoryMapSize() + ", Number of entries in BitCaskDiskMap => "
+          + bufferedRecords.getDiskBasedMapNumEntries() + ", Size of file spilled to disk => "
           + bufferedRecords.getSizeOfFileOnDiskInBytes());
       List<WriteStatus> writeStatuses = new ArrayList<>();
 
diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java
index 65a1d8ae2ddba..934dbadf1c750 100644
--- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java
+++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java
@@ -163,7 +163,9 @@ private void syncMeta() {
           Arrays.asList(connectConfigs.getMetaSyncClasses().split(",")));
       FileSystem fs = FSUtils.getFs(tableBasePath, new Configuration());
       for (String impl : syncClientToolClasses) {
-        SyncUtilHelpers.runHoodieMetaSync(impl.trim(), connectConfigs.getProps(), hadoopConf, fs, tableBasePath, HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT.defaultValue());
+        // TODO kafka connect config needs to support setting base file format
+        String baseFileFormat = connectConfigs.getStringOrDefault(HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT);
+        SyncUtilHelpers.runHoodieMetaSync(impl.trim(), connectConfigs.getProps(), hadoopConf, fs, tableBasePath, baseFileFormat);
       }
     }
   }
diff --git a/hudi-spark-datasource/hudi-spark-common/pom.xml b/hudi-spark-datasource/hudi-spark-common/pom.xml
index 31ac4802505ef..e55ff5a7bfa72 100644
--- a/hudi-spark-datasource/hudi-spark-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark-common/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-spark-common_${scala.binary.version}</name>
   <packaging>jar</packaging>
@@ -211,13 +211,6 @@
       <scope>test</scope>
     </dependency>
 
-    <!-- Spark (Packages) -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
     <!-- Hoodie - Test -->
     <dependency>
       <groupId>org.apache.hudi</groupId>
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java
index f87e16a652900..077b102a4a5c6 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.client.BaseClusterer;
 import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieSparkClusteringClient;
+import org.apache.hudi.common.engine.HoodieEngineContext;
 
 /**
  * Async clustering service for Spark structured streaming.
@@ -31,8 +32,8 @@ public class SparkStreamingAsyncClusteringService extends AsyncClusteringService
 
   private static final long serialVersionUID = 1L;
 
-  public SparkStreamingAsyncClusteringService(BaseHoodieWriteClient writeClient) {
-    super(writeClient, true);
+  public SparkStreamingAsyncClusteringService(HoodieEngineContext context, BaseHoodieWriteClient writeClient) {
+    super(context, writeClient, true);
   }
 
   @Override
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index d751e13c771bf..cc8fb0492affd 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -16,4 +16,5 @@
 # limitations under the License.
 
 
-org.apache.hudi.DefaultSource
\ No newline at end of file
+org.apache.hudi.DefaultSource
+org.apache.spark.sql.execution.datasources.parquet.SparkHoodieParquetFileFormat
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala
index da4e8d30e206f..525292da6da98 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala
@@ -20,10 +20,11 @@ package org.apache.hudi
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
+
 import org.apache.hudi.HoodieBaseRelation.createBaseFileReader
 import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.spark.sql.{HoodieCatalystExpressionUtils, SQLContext}
-import org.apache.spark.sql.catalyst.InternalRow
+
+import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.{BaseRelation, Filter}
@@ -52,11 +53,20 @@ class BaseFileOnlyRelation(sqlContext: SQLContext,
 
   override type FileSplit = HoodieBaseFileSplit
 
+  override lazy val mandatoryColumns: Seq[String] =
+    Seq(recordKeyField)
+
+  override def imbueConfigs(sqlContext: SQLContext): Unit = {
+    super.imbueConfigs(sqlContext)
+    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true")
+  }
+
   protected override def composeRDD(fileSplits: Seq[HoodieBaseFileSplit],
                                     partitionSchema: StructType,
                                     tableSchema: HoodieTableSchema,
                                     requiredSchema: HoodieTableSchema,
                                     filters: Array[Filter]): HoodieUnsafeRDD = {
+
     val baseFileReader = createBaseFileReader(
       spark = sparkSession,
       partitionSchema = partitionSchema,
@@ -66,7 +76,7 @@ class BaseFileOnlyRelation(sqlContext: SQLContext,
       options = optParams,
       // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
       //       to configure Parquet reader appropriately
-      hadoopConf = new Configuration(conf)
+      hadoopConf = HoodieDataSourceHelper.getConfigurationWithInternalSchema(new Configuration(conf), requiredSchema.internalSchema, metaClient.getBasePath, validCommits)
     )
 
     new HoodieFileScanRDD(sparkSession, baseFileReader, fileSplits)
@@ -82,7 +92,7 @@ class BaseFileOnlyRelation(sqlContext: SQLContext,
           sparkSession = sparkSession,
           file = file,
           // TODO clarify why this is required
-          partitionValues = InternalRow.empty
+          partitionValues = getPartitionColumnsAsInternalRow(file)
         )
       }
     }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
new file mode 100644
index 0000000000000..74393234120e5
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.avro.Schema.Parser
+import org.apache.avro.generic.GenericRecord
+import org.apache.hudi.ColumnStatsIndexSupport.{composeIndexSchema, deserialize, metadataRecordSchemaString, metadataRecordStructType, tryUnpackNonNullVal}
+import org.apache.hudi.HoodieConversionUtils.toScalaOption
+import org.apache.hudi.avro.model.HoodieMetadataRecord
+import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.common.config.HoodieMetadataConfig
+import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.common.table.view.FileSystemViewStorageConfig
+import org.apache.hudi.common.util.hash.ColumnIndexID
+import org.apache.hudi.data.HoodieJavaRDD
+import org.apache.hudi.metadata.{HoodieMetadataPayload, HoodieTableMetadata, HoodieTableMetadataUtil, MetadataPartitionType}
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{DataFrame, HoodieUnsafeRDDUtils, Row, SparkSession}
+
+import scala.collection.JavaConverters._
+import scala.collection.immutable.TreeSet
+
+/**
+ * Mixin trait abstracting away heavy-lifting of interactions with Metadata Table's Column Stats Index,
+ * providing convenient interfaces to read it, transpose, etc
+ */
+trait ColumnStatsIndexSupport extends SparkAdapterSupport {
+
+  def readColumnStatsIndex(spark: SparkSession,
+                           tableBasePath: String,
+                           metadataConfig: HoodieMetadataConfig,
+                           targetColumns: Seq[String] = Seq.empty): DataFrame = {
+    val targetColStatsIndexColumns = Seq(
+      HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME,
+      HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE,
+      HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE,
+      HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT,
+      HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT)
+
+    val requiredMetadataIndexColumns =
+      (targetColStatsIndexColumns :+ HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME).map(colName =>
+        s"${HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS}.${colName}")
+
+    val metadataTableDF: DataFrame = {
+      // NOTE: If specific columns have been provided, we can considerably trim down amount of data fetched
+      //       by only fetching Column Stats Index records pertaining to the requested columns.
+      //       Otherwise we fallback to read whole Column Stats Index
+      if (targetColumns.nonEmpty) {
+        readColumnStatsIndexForColumnsInternal(spark, targetColumns, metadataConfig, tableBasePath)
+      } else {
+        readFullColumnStatsIndexInternal(spark, tableBasePath)
+      }
+    }
+
+    val colStatsDF = metadataTableDF.where(col(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).isNotNull)
+      .select(requiredMetadataIndexColumns.map(col): _*)
+
+    colStatsDF
+  }
+
+  /**
+   * Transposes and converts the raw table format of the Column Stats Index representation,
+   * where each row/record corresponds to individual (column, file) pair, into the table format
+   * where each row corresponds to single file with statistic for individual columns collated
+   * w/in such row:
+   *
+   * Metadata Table Column Stats Index format:
+   *
+   * <pre>
+   *  +---------------------------+------------+------------+------------+-------------+
+   *  |        fileName           | columnName |  minValue  |  maxValue  |  num_nulls  |
+   *  +---------------------------+------------+------------+------------+-------------+
+   *  | one_base_file.parquet     |          A |          1 |         10 |           0 |
+   *  | another_base_file.parquet |          A |        -10 |          0 |           5 |
+   *  +---------------------------+------------+------------+------------+-------------+
+   * </pre>
+   *
+   * Returned table format
+   *
+   * <pre>
+   *  +---------------------------+------------+------------+-------------+
+   *  |          file             | A_minValue | A_maxValue | A_nullCount |
+   *  +---------------------------+------------+------------+-------------+
+   *  | one_base_file.parquet     |          1 |         10 |           0 |
+   *  | another_base_file.parquet |        -10 |          0 |           5 |
+   *  +---------------------------+------------+------------+-------------+
+   * </pre>
+   *
+   * NOTE: Column Stats Index might potentially contain statistics for many columns (if not all), while
+   *       query at hand might only be referencing a handful of those. As such, we collect all the
+   *       column references from the filtering expressions, and only transpose records corresponding to the
+   *       columns referenced in those
+   *
+   * @param spark Spark session ref
+   * @param colStatsDF [[DataFrame]] bearing raw Column Stats Index table
+   * @param targetColumns target columns to be included into the final table
+   * @param tableSchema schema of the source data table
+   * @return reshaped table according to the format outlined above
+   */
+  def transposeColumnStatsIndex(spark: SparkSession, colStatsDF: DataFrame, targetColumns: Seq[String], tableSchema: StructType): DataFrame = {
+    val colStatsSchema = colStatsDF.schema
+    val colStatsSchemaOrdinalsMap = colStatsSchema.fields.zipWithIndex.map({
+      case (field, ordinal) => (field.name, ordinal)
+    }).toMap
+
+    val tableSchemaFieldMap = tableSchema.fields.map(f => (f.name, f)).toMap
+
+    // NOTE: We're sorting the columns to make sure final index schema matches layout
+    //       of the transposed table
+    val sortedColumns = TreeSet(targetColumns: _*)
+
+    val colNameOrdinal = colStatsSchemaOrdinalsMap(HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME)
+    val minValueOrdinal = colStatsSchemaOrdinalsMap(HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE)
+    val maxValueOrdinal = colStatsSchemaOrdinalsMap(HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE)
+    val fileNameOrdinal = colStatsSchemaOrdinalsMap(HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME)
+    val nullCountOrdinal = colStatsSchemaOrdinalsMap(HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT)
+    val valueCountOrdinal = colStatsSchemaOrdinalsMap(HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT)
+
+    val transposedRDD = colStatsDF.rdd
+      .filter(row => sortedColumns.contains(row.getString(colNameOrdinal)))
+      .map { row =>
+        val (minValue, _) = tryUnpackNonNullVal(row.getAs[Row](minValueOrdinal))
+        val (maxValue, _) = tryUnpackNonNullVal(row.getAs[Row](maxValueOrdinal))
+
+        val colName = row.getString(colNameOrdinal)
+        val colType = tableSchemaFieldMap(colName).dataType
+
+        val rowValsSeq = row.toSeq.toArray
+
+        rowValsSeq(minValueOrdinal) = deserialize(minValue, colType)
+        rowValsSeq(maxValueOrdinal) = deserialize(maxValue, colType)
+
+        Row(rowValsSeq:_*)
+      }
+      .groupBy(r => r.getString(fileNameOrdinal))
+      .foldByKey(Seq[Row]()) {
+        case (_, columnRows) =>
+          // Rows seq is always non-empty (otherwise it won't be grouped into)
+          val fileName = columnRows.head.get(fileNameOrdinal)
+          val valueCount = columnRows.head.get(valueCountOrdinal)
+
+          val coalescedRowValuesSeq = columnRows.toSeq
+            // NOTE: It's crucial to maintain appropriate ordering of the columns
+            //       matching table layout
+            .sortBy(_.getString(colNameOrdinal))
+            .foldLeft(Seq[Any](fileName, valueCount)) {
+              case (acc, columnRow) =>
+                acc ++ Seq(minValueOrdinal, maxValueOrdinal, nullCountOrdinal).map(ord => columnRow.get(ord))
+            }
+
+          Seq(Row(coalescedRowValuesSeq:_*))
+      }
+      .values
+      .flatMap(it => it)
+
+    // NOTE: It's crucial to maintain appropriate ordering of the columns
+    //       matching table layout: hence, we cherry-pick individual columns
+    //       instead of simply filtering in the ones we're interested in the schema
+    val indexSchema = composeIndexSchema(sortedColumns.toSeq, tableSchema)
+
+    spark.createDataFrame(transposedRDD, indexSchema)
+  }
+
+  private def readFullColumnStatsIndexInternal(spark: SparkSession, tableBasePath: String) = {
+    val metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(tableBasePath)
+    // Read Metadata Table's Column Stats Index into Spark's [[DataFrame]]
+    spark.read.format("org.apache.hudi")
+      .load(s"$metadataTablePath/${MetadataPartitionType.COLUMN_STATS.getPartitionPath}")
+  }
+
+  private def readColumnStatsIndexForColumnsInternal(spark: SparkSession, targetColumns: Seq[String], metadataConfig: HoodieMetadataConfig, tableBasePath: String) = {
+    val ctx = new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext))
+
+    // Read Metadata Table's Column Stats Index into Spark's [[DataFrame]] by
+    //    - Fetching the records from CSI by key-prefixes (encoded column names)
+    //    - Deserializing fetched records into [[InternalRow]]s
+    //    - Composing [[DataFrame]]
+    val metadataTableDF = {
+      val metadataTable = HoodieTableMetadata.create(ctx, metadataConfig, tableBasePath, FileSystemViewStorageConfig.SPILLABLE_DIR.defaultValue)
+
+      // TODO encoding should be done internally w/in HoodieBackedTableMetadata
+      val encodedTargetColumnNames = targetColumns.map(colName => new ColumnIndexID(colName).asBase64EncodedString())
+
+      val recordsRDD: RDD[HoodieRecord[HoodieMetadataPayload]] =
+        HoodieJavaRDD.getJavaRDD(
+          metadataTable.getRecordsByKeyPrefixes(encodedTargetColumnNames.asJava, HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS)
+        )
+
+      val catalystRowsRDD: RDD[InternalRow] = recordsRDD.mapPartitions { it =>
+        val metadataRecordSchema = new Parser().parse(metadataRecordSchemaString)
+        val converter = AvroConversionUtils.createAvroToInternalRowConverter(metadataRecordSchema, metadataRecordStructType)
+
+        it.map { record =>
+          // schema and props are ignored for generating metadata record from the payload
+          // instead, the underlying file system, or bloom filter, or columns stats metadata (part of payload) are directly used
+          toScalaOption(record.getData.getInsertValue(null, null))
+            .flatMap(avroRecord => converter(avroRecord.asInstanceOf[GenericRecord]))
+            .orNull
+        }
+      }
+
+      HoodieUnsafeRDDUtils.createDataFrame(spark, catalystRowsRDD, metadataRecordStructType)
+    }
+    metadataTableDF
+  }
+}
+
+object ColumnStatsIndexSupport {
+
+  private val metadataRecordSchemaString: String = HoodieMetadataRecord.SCHEMA$.toString
+  private val metadataRecordStructType: StructType = AvroConversionUtils.convertAvroSchemaToStructType(HoodieMetadataRecord.SCHEMA$)
+
+  /**
+   * @VisibleForTesting
+   */
+  def composeIndexSchema(targetColumnNames: Seq[String], tableSchema: StructType): StructType = {
+    val fileNameField = StructField(HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME, StringType, nullable = true, Metadata.empty)
+    val valueCountField = StructField(HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT, LongType, nullable = true, Metadata.empty)
+
+    val targetFields = targetColumnNames.map(colName => tableSchema.fields.find(f => f.name == colName).get)
+
+    StructType(
+      targetFields.foldLeft(Seq(fileNameField, valueCountField)) {
+        case (acc, field) =>
+          acc ++ Seq(
+            composeColumnStatStructType(field.name, HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE, field.dataType),
+            composeColumnStatStructType(field.name, HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE, field.dataType),
+            composeColumnStatStructType(field.name, HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT, LongType))
+      }
+    )
+  }
+
+  @inline def getMinColumnNameFor(colName: String): String =
+    formatColName(colName, HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE)
+
+  @inline def getMaxColumnNameFor(colName: String): String =
+    formatColName(colName, HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE)
+
+  @inline def getNullCountColumnNameFor(colName: String): String =
+    formatColName(colName, HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT)
+
+  @inline def getValueCountColumnNameFor: String =
+    HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT
+
+  @inline private def formatColName(col: String, statName: String) = { // TODO add escaping for
+    String.format("%s_%s", col, statName)
+  }
+
+  @inline private def composeColumnStatStructType(col: String, statName: String, dataType: DataType) =
+    StructField(formatColName(col, statName), dataType, nullable = true, Metadata.empty)
+
+  private def tryUnpackNonNullVal(statStruct: Row): (Any, Int) =
+    statStruct.toSeq.zipWithIndex
+      .find(_._1 != null)
+      // NOTE: First non-null value will be a wrapper (converted into Row), bearing a single
+      //       value
+      .map { case (value, ord) => (value.asInstanceOf[Row].get(0), ord)}
+      .getOrElse((null, -1))
+
+  private def deserialize(value: Any, dataType: DataType): Any = {
+    dataType match {
+      // NOTE: Since we can't rely on Avro's "date", and "timestamp-micros" logical-types, we're
+      //       manually encoding corresponding values as int and long w/in the Column Stats Index and
+      //       here we have to decode those back into corresponding logical representation.
+      case TimestampType => DateTimeUtils.toJavaTimestamp(value.asInstanceOf[Long])
+      case DateType => DateTimeUtils.toJavaDate(value.asInstanceOf[Int])
+
+      // NOTE: All integral types of size less than Int are encoded as Ints in MT
+      case ShortType => value.asInstanceOf[Int].toShort
+      case ByteType => value.asInstanceOf[Int].toByte
+
+      case _ => value
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
index f86e55b436f1e..432988962dea1 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -471,11 +471,10 @@ object DataSourceWriteOptions {
     .sinceVersion("0.9.0")
     .withDocumentation("This class is used by kafka client to deserialize the records")
 
-  val DROP_PARTITION_COLUMNS: ConfigProperty[String] = ConfigProperty
-    .key("hoodie.datasource.write.drop.partition.columns")
-    .defaultValue("false")
-    .withDocumentation("When set to true, will not write the partition columns into hudi. " +
-      "By default, false.")
+  val DROP_PARTITION_COLUMNS: ConfigProperty[Boolean] = ConfigProperty
+    .key(HoodieTableConfig.DROP_PARTITION_COLUMNS.key())
+    .defaultValue(HoodieTableConfig.DROP_PARTITION_COLUMNS.defaultValue().booleanValue())
+    .withDocumentation(HoodieTableConfig.DROP_PARTITION_COLUMNS.doc())
 
   /** @deprecated Use {@link HIVE_ASSUME_DATE_PARTITION} and its methods instead */
   @Deprecated
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
index 65dbdee127902..7550ff13fd5ea 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -21,12 +21,12 @@ import org.apache.hadoop.fs.Path
 import org.apache.hudi.DataSourceReadOptions._
 import org.apache.hudi.DataSourceWriteOptions.{BOOTSTRAP_OPERATION_OPT_VAL, OPERATION}
 import org.apache.hudi.common.fs.FSUtils
-import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord}
 import org.apache.hudi.common.model.HoodieTableType.{COPY_ON_WRITE, MERGE_ON_READ}
+import org.apache.hudi.common.table.timeline.HoodieInstant
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.exception.HoodieException
 import org.apache.log4j.LogManager
-import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.hudi.streaming.HoodieStreamSource
 import org.apache.spark.sql.sources._
@@ -46,6 +46,7 @@ class DefaultSource extends RelationProvider
   with DataSourceRegister
   with StreamSinkProvider
   with StreamSourceProvider
+  with SparkAdapterSupport
   with Serializable {
 
   SparkSession.getActiveSession.foreach { spark =>
@@ -108,7 +109,6 @@ class DefaultSource extends RelationProvider
              (COPY_ON_WRITE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) |
              (MERGE_ON_READ, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) =>
           new BaseFileOnlyRelation(sqlContext, metaClient, parameters, userSchema, globPaths)
-
         case (COPY_ON_WRITE, QUERY_TYPE_INCREMENTAL_OPT_VAL, _) =>
           new IncrementalRelation(sqlContext, parameters, userSchema, metaClient)
 
@@ -128,6 +128,11 @@ class DefaultSource extends RelationProvider
     }
   }
 
+  def getValidCommits(metaClient: HoodieTableMetaClient): String = {
+    metaClient
+      .getCommitsAndCompactionTimeline.filterCompletedInstants.getInstants.toArray().map(_.asInstanceOf[HoodieInstant].getFileName).mkString(",")
+  }
+
   /**
     * This DataSource API is used for writing the DataFrame at the destination. For now, we are returning a dummy
     * relation here because Spark does not really make use of the relation returned, and just returns an empty
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
index 11778da63db31..c33c6dce6d0cd 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
@@ -19,43 +19,57 @@ package org.apache.hudi
 
 import org.apache.avro.Schema
 import org.apache.avro.generic.GenericRecord
+
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, Path, PathFilter}
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hbase.io.hfile.CacheConfig
 import org.apache.hadoop.mapred.JobConf
-import org.apache.hudi.HoodieBaseRelation.{getPartitionPath, isMetadataTable}
+
+import org.apache.hudi.HoodieBaseRelation.getPartitionPath
 import org.apache.hudi.HoodieConversionUtils.toScalaOption
-import org.apache.hudi.common.config.SerializableConfiguration
+import org.apache.hudi.common.config.{HoodieMetadataConfig, SerializableConfiguration}
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord}
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.util.StringUtils
-import org.apache.hudi.hadoop.HoodieROTablePathFilter
+import org.apache.hudi.common.util.ValidationUtils.checkState
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
 import org.apache.hudi.io.storage.HoodieHFileReader
-import org.apache.hudi.metadata.{HoodieMetadataPayload, HoodieTableMetadata}
+
+import org.apache.spark.TaskContext
 import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.avro.SchemaConverters
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression}
-import org.apache.spark.sql.execution.datasources.{FileStatusCache, PartitionDirectory, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.{FileStatusCache, PartitionedFile, PartitioningUtils}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
 import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.sql.{Row, SQLContext, SparkSession}
+import org.apache.spark.unsafe.types.UTF8String
+
+import java.io.Closeable
+import java.net.URI
 
 import scala.collection.JavaConverters._
 import scala.util.Try
+import scala.util.control.NonFatal
 
 trait HoodieFileSplit {}
 
-case class HoodieTableSchema(structTypeSchema: StructType, avroSchemaStr: String)
+case class HoodieTableSchema(structTypeSchema: StructType, avroSchemaStr: String, internalSchema: InternalSchema = InternalSchema.getEmptyInternalSchema)
 
-case class HoodieTableState(recordKeyField: String,
-                            preCombineFieldOpt: Option[String])
+case class HoodieTableState(tablePath: String,
+                            latestCommitTimestamp: String,
+                            recordKeyField: String,
+                            preCombineFieldOpt: Option[String],
+                            usesVirtualKeys: Boolean,
+                            recordPayloadClassName: String,
+                            metadataConfig: HoodieMetadataConfig)
 
 /**
  * Hoodie BaseRelation which extends [[PrunedFilteredScan]].
@@ -64,7 +78,7 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
                                   val metaClient: HoodieTableMetaClient,
                                   val optParams: Map[String, String],
                                   userSchema: Option[StructType])
-  extends BaseRelation with PrunedFilteredScan with Logging {
+  extends BaseRelation with PrunedFilteredScan with Logging with SparkAdapterSupport {
 
   type FileSplit <: HoodieFileSplit
 
@@ -79,34 +93,64 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
 
   protected lazy val basePath: String = metaClient.getBasePath
 
-  // If meta fields are enabled, always prefer key from the meta field as opposed to user-specified one
-  // NOTE: This is historical behavior which is preserved as is
+  // NOTE: Record key-field is assumed singular here due to the either of
+  //          - In case Hudi's meta fields are enabled: record key will be pre-materialized (stored) as part
+  //          of the record's payload (as part of the Hudi's metadata)
+  //          - In case Hudi's meta fields are disabled (virtual keys): in that case record has to bear _single field_
+  //          identified as its (unique) primary key w/in its payload (this is a limitation of [[SimpleKeyGenerator]],
+  //          which is the only [[KeyGenerator]] permitted for virtual-keys payloads)
   protected lazy val recordKeyField: String =
-    if (tableConfig.populateMetaFields()) HoodieRecord.RECORD_KEY_METADATA_FIELD
-    else tableConfig.getRecordKeyFieldProp
+    if (tableConfig.populateMetaFields()) {
+      HoodieRecord.RECORD_KEY_METADATA_FIELD
+    } else {
+      val keyFields = tableConfig.getRecordKeyFields.get()
+      checkState(keyFields.length == 1)
+      keyFields.head
+    }
 
-  protected lazy val preCombineFieldOpt: Option[String] = getPrecombineFieldProperty
+  protected lazy val preCombineFieldOpt: Option[String] =
+    Option(tableConfig.getPreCombineField)
+      .orElse(optParams.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)) match {
+      // NOTE: This is required to compensate for cases when empty string is used to stub
+      //       property value to avoid it being set with the default value
+      // TODO(HUDI-3456) cleanup
+      case Some(f) if !StringUtils.isNullOrEmpty(f) => Some(f)
+      case _ => None
+    }
 
   protected lazy val specifiedQueryTimestamp: Option[String] =
     optParams.get(DataSourceReadOptions.TIME_TRAVEL_AS_OF_INSTANT.key)
       .map(HoodieSqlCommonUtils.formatQueryInstant)
 
-  protected lazy val tableAvroSchema: Schema = {
+  protected lazy val (tableAvroSchema: Schema, internalSchema: InternalSchema) = {
     val schemaUtil = new TableSchemaResolver(metaClient)
-    Try(schemaUtil.getTableAvroSchema).getOrElse(
+    val avroSchema = Try(schemaUtil.getTableAvroSchema).getOrElse(
       // If there is no commit in the table, we can't get the schema
       // t/h [[TableSchemaResolver]], fallback to the provided [[userSchema]] instead.
       userSchema match {
-        case Some(s) => SchemaConverters.toAvroType(s)
+        case Some(s) => sparkAdapter.getAvroSchemaConverters.toAvroType(s, nullable = false, "record")
         case _ => throw new IllegalArgumentException("User-provided schema is required in case the table is empty")
       }
     )
+    // try to find internalSchema
+    val internalSchemaFromMeta = try {
+      schemaUtil.getTableInternalSchemaFromCommitMetadata.orElse(InternalSchema.getEmptyInternalSchema)
+    } catch {
+      case _: Exception => InternalSchema.getEmptyInternalSchema
+    }
+    (avroSchema, internalSchemaFromMeta)
   }
 
   protected val tableStructSchema: StructType = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
 
   protected val partitionColumns: Array[String] = tableConfig.getPartitionFields.orElse(Array.empty)
 
+  /**
+   * if true, need to deal with schema for creating file reader.
+   */
+  protected val dropPartitionColumnsWhenWrite: Boolean =
+    metaClient.getTableConfig.isDropPartitionColumns && partitionColumns.nonEmpty
+
   /**
    * NOTE: PLEASE READ THIS CAREFULLY
    *
@@ -119,27 +163,26 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
       FileStatusCache.getOrCreate(sparkSession))
 
   /**
+   * Columns that relation has to read from the storage to properly execute on its semantic: for ex,
+   * for Merge-on-Read tables key fields as well and pre-combine field comprise mandatory set of columns,
+   * meaning that regardless of whether this columns are being requested by the query they will be fetched
+   * regardless so that relation is able to combine records properly (if necessary)
+   *
    * @VisibleInTests
    */
-  lazy val mandatoryColumns: Seq[String] = {
-    if (isMetadataTable(metaClient)) {
-      Seq(HoodieMetadataPayload.KEY_FIELD_NAME, HoodieMetadataPayload.SCHEMA_FIELD_NAME_TYPE)
-    } else {
-      // TODO this is MOR table requirement, not necessary for COW
-      Seq(recordKeyField) ++ preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
-    }
-  }
+  val mandatoryColumns: Seq[String]
 
   protected def timeline: HoodieTimeline =
   // NOTE: We're including compaction here since it's not considering a "commit" operation
     metaClient.getCommitsAndCompactionTimeline.filterCompletedInstants
 
+  protected val validCommits = timeline.getInstants.toArray().map(_.asInstanceOf[HoodieInstant].getFileName).mkString(",")
+
   protected def latestInstant: Option[HoodieInstant] =
     toScalaOption(timeline.lastInstant())
 
-  protected def queryTimestamp: Option[String] = {
-    specifiedQueryTimestamp.orElse(toScalaOption(timeline.lastInstant()).map(i => i.getTimestamp))
-  }
+  protected def queryTimestamp: Option[String] =
+    specifiedQueryTimestamp.orElse(toScalaOption(timeline.lastInstant()).map(_.getTimestamp))
 
   override def schema: StructType = tableStructSchema
 
@@ -169,22 +212,45 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
     //       filtered out upstream
     val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
 
-    val (requiredAvroSchema, requiredStructSchema) =
-      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
+    val (requiredAvroSchema, requiredStructSchema, requiredInternalSchema) =
+      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns, internalSchema)
 
     val filterExpressions = convertToExpressions(filters)
     val (partitionFilters, dataFilters) = filterExpressions.partition(isPartitionPredicate)
 
     val fileSplits = collectFileSplits(partitionFilters, dataFilters)
 
-    val partitionSchema = StructType(Nil)
-    val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
-    val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
+    val partitionSchema = if (dropPartitionColumnsWhenWrite) {
+      // when hoodie.datasource.write.drop.partition.columns is true, partition columns can't be persisted in
+      // data files.
+      StructType(partitionColumns.map(StructField(_, StringType)))
+    } else {
+      StructType(Nil)
+    }
 
+    val tableSchema = HoodieTableSchema(tableStructSchema, if (internalSchema.isEmptySchema) tableAvroSchema.toString else AvroInternalSchemaConverter.convert(internalSchema, tableAvroSchema.getName).toString, internalSchema)
+    val dataSchema = if (dropPartitionColumnsWhenWrite) {
+      val dataStructType = StructType(tableStructSchema.filterNot(f => partitionColumns.contains(f.name)))
+      HoodieTableSchema(
+        dataStructType,
+        sparkAdapter.getAvroSchemaConverters.toAvroType(dataStructType, nullable = false, "record").toString()
+      )
+    } else {
+      tableSchema
+    }
+    val requiredSchema = if (dropPartitionColumnsWhenWrite) {
+      val requiredStructType = StructType(requiredStructSchema.filterNot(f => partitionColumns.contains(f.name)))
+      HoodieTableSchema(
+        requiredStructType,
+        sparkAdapter.getAvroSchemaConverters.toAvroType(requiredStructType, nullable = false, "record").toString()
+      )
+    } else {
+      HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString, requiredInternalSchema)
+    }
     // Here we rely on a type erasure, to workaround inherited API restriction and pass [[RDD[InternalRow]]] back as [[RDD[Row]]]
     // Please check [[needConversion]] scala-doc for more details
     if (fileSplits.nonEmpty)
-      composeRDD(fileSplits, partitionSchema, tableSchema, requiredSchema, filters).asInstanceOf[RDD[Row]]
+      composeRDD(fileSplits, partitionSchema, dataSchema, requiredSchema, filters).asInstanceOf[RDD[Row]]
     else
       sparkSession.sparkContext.emptyRDD
   }
@@ -254,26 +320,69 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
   }
 
   protected final def appendMandatoryColumns(requestedColumns: Array[String]): Array[String] = {
-    val missing = mandatoryColumns.filter(col => !requestedColumns.contains(col))
-    requestedColumns ++ missing
+    if (dropPartitionColumnsWhenWrite) {
+      if (requestedColumns.isEmpty) {
+        mandatoryColumns.toArray
+      } else {
+        requestedColumns
+      }
+    } else {
+      val missing = mandatoryColumns.filter(col => !requestedColumns.contains(col))
+      requestedColumns ++ missing
+    }
   }
 
-  private def getPrecombineFieldProperty: Option[String] =
-    Option(tableConfig.getPreCombineField)
-      .orElse(optParams.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)) match {
-      // NOTE: This is required to compensate for cases when empty string is used to stub
-      //       property value to avoid it being set with the default value
-      // TODO(HUDI-3456) cleanup
-      case Some(f) if !StringUtils.isNullOrEmpty(f) => Some(f)
-      case _ => None
-    }
+  protected def getTableState: HoodieTableState = {
+    // Subset of the state of table's configuration as of at the time of the query
+    HoodieTableState(
+      tablePath = basePath,
+      latestCommitTimestamp = queryTimestamp.get,
+      recordKeyField = recordKeyField,
+      preCombineFieldOpt = preCombineFieldOpt,
+      usesVirtualKeys = !tableConfig.populateMetaFields(),
+      recordPayloadClassName = tableConfig.getPayloadClass,
+      metadataConfig = fileIndex.metadataConfig
+    )
+  }
 
-  private def imbueConfigs(sqlContext: SQLContext): Unit = {
+  def imbueConfigs(sqlContext: SQLContext): Unit = {
     sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true")
     sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true")
     // TODO(HUDI-3639) vectorized reader has to be disabled to make sure MORIncrementalRelation is working properly
     sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
   }
+
+  /**
+   * For enable hoodie.datasource.write.drop.partition.columns, need to create an InternalRow on partition values
+   * and pass this reader on parquet file. So that, we can query the partition columns.
+   */
+  protected def getPartitionColumnsAsInternalRow(file: FileStatus): InternalRow = {
+    try {
+      val tableConfig = metaClient.getTableConfig
+      if (dropPartitionColumnsWhenWrite) {
+        val relativePath = new URI(metaClient.getBasePath).relativize(new URI(file.getPath.getParent.toString)).toString
+        val hiveStylePartitioningEnabled = tableConfig.getHiveStylePartitioningEnable.toBoolean
+        if (hiveStylePartitioningEnabled) {
+          val partitionSpec = PartitioningUtils.parsePathFragment(relativePath)
+          InternalRow.fromSeq(partitionColumns.map(partitionSpec(_)).map(UTF8String.fromString))
+        } else {
+          if (partitionColumns.length == 1) {
+            InternalRow.fromSeq(Seq(UTF8String.fromString(relativePath)))
+          } else {
+            val parts = relativePath.split("/")
+            assert(parts.size == partitionColumns.length)
+            InternalRow.fromSeq(parts.map(UTF8String.fromString))
+          }
+        }
+      } else {
+        InternalRow.empty
+      }
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Failed to get the right partition InternalRow for file : ${file.toString}")
+        InternalRow.empty
+    }
+  }
 }
 
 object HoodieBaseRelation {
@@ -281,9 +390,6 @@ object HoodieBaseRelation {
   def getPartitionPath(fileStatus: FileStatus): Path =
     fileStatus.getPath.getParent
 
-  def isMetadataTable(metaClient: HoodieTableMetaClient): Boolean =
-    HoodieTableMetadata.isMetadataTable(metaClient.getBasePath)
-
   /**
    * Returns file-reader routine accepting [[PartitionedFile]] and returning an [[Iterator]]
    * over [[InternalRow]]
@@ -316,7 +422,13 @@ object HoodieBaseRelation {
     partitionedFile => {
       val extension = FSUtils.getFileExtension(partitionedFile.filePath)
       if (HoodieFileFormat.PARQUET.getFileExtension.equals(extension)) {
-        parquetReader.apply(partitionedFile)
+        val iter = parquetReader.apply(partitionedFile)
+        if (iter.isInstanceOf[Closeable]) {
+          // register a callback to close parquetReader which will be executed on task completion.
+          // when tasks finished, this method will be called, and release resources.
+          Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => iter.asInstanceOf[Closeable].close()))
+        }
+        iter
       } else if (HoodieFileFormat.HFILE.getFileExtension.equals(extension)) {
         hfileReader.apply(partitionedFile)
       } else {
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
index 0871487b5e8c6..02264bc4a62fb 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
@@ -20,6 +20,9 @@ package org.apache.hudi
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FileStatus
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.utils.SerDeHelper
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{PredicateHelper, SpecificInternalRow, UnsafeProjection}
@@ -31,7 +34,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 
 import scala.collection.JavaConverters._
 
-object HoodieDataSourceHelper extends PredicateHelper {
+object HoodieDataSourceHelper extends PredicateHelper with SparkAdapterSupport {
 
 
   /**
@@ -46,7 +49,7 @@ object HoodieDataSourceHelper extends PredicateHelper {
                                options: Map[String, String],
                                hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
 
-    val readParquetFile: PartitionedFile => Iterator[Any] = new ParquetFileFormat().buildReaderWithPartitionValues(
+    val readParquetFile: PartitionedFile => Iterator[Any] = sparkAdapter.createHoodieParquetFileFormat().get.buildReaderWithPartitionValues(
       sparkSession = sparkSession,
       dataSchema = dataSchema,
       partitionSchema = partitionSchema,
@@ -65,28 +68,6 @@ object HoodieDataSourceHelper extends PredicateHelper {
     }
   }
 
-  /**
-   * Convert [[InternalRow]] to [[SpecificInternalRow]].
-   */
-  def createInternalRowWithSchema(
-      row: InternalRow,
-      schema: StructType,
-      positions: Seq[Int]): InternalRow = {
-    val rowToReturn = new SpecificInternalRow(schema)
-    var curIndex = 0
-    schema.zip(positions).foreach { case (field, pos) =>
-      val curField = if (row.isNullAt(pos)) {
-        null
-      } else {
-        row.get(pos, field.dataType)
-      }
-      rowToReturn.update(curIndex, curField)
-      curIndex += 1
-    }
-    rowToReturn
-  }
-
-
   def splitFiles(
       sparkSession: SparkSession,
       file: FileStatus,
@@ -100,4 +81,19 @@ object HoodieDataSourceHelper extends PredicateHelper {
     }
   }
 
+  /**
+    * Set internalSchema evolution parameters to configuration.
+    * spark will broadcast them to each executor, we use those parameters to do schema evolution.
+    *
+    * @param conf hadoop conf.
+    * @param internalSchema internalschema for query.
+    * @param tablePath hoodie table base path.
+    * @param validCommits valid commits, using give validCommits to validate all legal histroy Schema files, and return the latest one.
+    */
+  def getConfigurationWithInternalSchema(conf: Configuration, internalSchema: InternalSchema, tablePath: String, validCommits: String): Configuration = {
+    conf.set(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, SerDeHelper.toJson(internalSchema))
+    conf.set(SparkInternalSchemaConverter.HOODIE_TABLE_PATH, tablePath)
+    conf.set(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST, validCommits)
+    conf
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index de863203d6d5e..0ea4d1cef2e04 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -19,24 +19,23 @@ package org.apache.hudi
 
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hudi.HoodieDatasetUtils.withPersistence
-import org.apache.hudi.HoodieFileIndex.{collectReferencedColumns, getConfigProperties}
+import org.apache.hudi.HoodieFileIndex.{DataSkippingFailureMode, collectReferencedColumns, getConfigProperties}
 import org.apache.hudi.common.config.{HoodieMetadataConfig, TypedProperties}
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.util.StringUtils
-import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper.{getMaxColumnNameFor, getMinColumnNameFor, getNumNullsColumnNameFor}
+import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions
 import org.apache.hudi.keygen.{TimestampBasedAvroKeyGenerator, TimestampBasedKeyGenerator}
-import org.apache.hudi.metadata.{HoodieMetadataPayload, HoodieTableMetadata, MetadataPartitionType}
+import org.apache.hudi.metadata.{HoodieMetadataPayload, HoodieTableMetadataUtil}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, Literal}
 import org.apache.spark.sql.execution.datasources.{FileIndex, FileStatusCache, NoopCache, PartitionDirectory}
-import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.hudi.DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{StringType, StructType}
-import org.apache.spark.sql.{AnalysisException, Column, SparkSession}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, DataFrame, SparkSession}
 import org.apache.spark.unsafe.types.UTF8String
 
 import java.text.SimpleDateFormat
@@ -81,11 +80,12 @@ case class HoodieFileIndex(spark: SparkSession,
     specifiedQueryInstant = options.get(DataSourceReadOptions.TIME_TRAVEL_AS_OF_INSTANT.key).map(HoodieSqlCommonUtils.formatQueryInstant),
     fileStatusCache = fileStatusCache
   )
-    with FileIndex {
+    with FileIndex
+    with ColumnStatsIndexSupport {
 
   override def rootPaths: Seq[Path] = queryPaths.asScala
 
-  def isDataSkippingEnabled(): Boolean = {
+  def isDataSkippingEnabled: Boolean = {
     options.getOrElse(DataSourceReadOptions.ENABLE_DATA_SKIPPING.key(),
       spark.sessionState.conf.getConfString(DataSourceReadOptions.ENABLE_DATA_SKIPPING.key(), "false")).toBoolean
   }
@@ -124,8 +124,12 @@ case class HoodieFileIndex(spark: SparkSession,
       lookupCandidateFilesInMetadataTable(dataFilters) match {
         case Success(opt) => opt
         case Failure(e) =>
-          logError("Failed to lookup candidate files in Z-index", e)
-          Option.empty
+          logError("Failed to lookup candidate files in File Index", e)
+
+          spark.sqlContext.getConf(DataSkippingFailureMode.configName, DataSkippingFailureMode.Fallback.value) match {
+            case DataSkippingFailureMode.Fallback.value => Option.empty
+            case DataSkippingFailureMode.Strict.value   => throw new HoodieException(e);
+          }
       }
 
     logDebug(s"Overlapping candidate files from Column Stats Index: ${candidateFilesNamesOpt.getOrElse(Set.empty)}")
@@ -192,67 +196,17 @@ case class HoodieFileIndex(spark: SparkSession,
    * @return list of pruned (data-skipped) candidate base-files' names
    */
   private def lookupCandidateFilesInMetadataTable(queryFilters: Seq[Expression]): Try[Option[Set[String]]] = Try {
-    val fs = metaClient.getFs
-    val metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath)
-
-    if (!isDataSkippingEnabled() || !fs.exists(new Path(metadataTablePath)) || queryFilters.isEmpty) {
+    if (!isDataSkippingEnabled || queryFilters.isEmpty || !HoodieTableMetadataUtil.getCompletedMetadataPartitions(metaClient.getTableConfig)
+      .contains(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS)) {
       Option.empty
     } else {
-      val targetColStatsIndexColumns = Seq(
-        HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME,
-        HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE,
-        HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE,
-        HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT)
-
-      val requiredMetadataIndexColumns =
-        (targetColStatsIndexColumns :+ HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME).map(colName =>
-          s"${HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS}.${colName}")
-
-      // Read Metadata Table's Column Stats Index into Spark's [[DataFrame]]
-      val metadataTableDF = spark.read.format("org.apache.hudi")
-        .load(s"$metadataTablePath/${MetadataPartitionType.COLUMN_STATS.getPartitionPath}")
-
-      // TODO filter on (column, partition) prefix
-      val colStatsDF = metadataTableDF.where(col(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).isNotNull)
-        .select(requiredMetadataIndexColumns.map(col): _*)
-
       val queryReferencedColumns = collectReferencedColumns(spark, queryFilters, schema)
 
+      val colStatsDF: DataFrame = readColumnStatsIndex(spark, basePath, metadataConfig, queryReferencedColumns)
+
       // Persist DF to avoid re-computing column statistics unraveling
       withPersistence(colStatsDF) {
-        // Metadata Table bears rows in the following format
-        //
-        //  +---------------------------+------------+------------+------------+-------------+
-        //  |        fileName           | columnName |  minValue  |  maxValue  |  num_nulls  |
-        //  +---------------------------+------------+------------+------------+-------------+
-        //  | one_base_file.parquet     |          A |          1 |         10 |           0 |
-        //  | another_base_file.parquet |          A |        -10 |          0 |           5 |
-        //  +---------------------------+------------+------------+------------+-------------+
-        //
-        // While Data Skipping utils are expecting following (transposed) format, where per-column stats are
-        // essentially transposed (from rows to columns):
-        //
-        //  +---------------------------+------------+------------+-------------+
-        //  |          file             | A_minValue | A_maxValue | A_num_nulls |
-        //  +---------------------------+------------+------------+-------------+
-        //  | one_base_file.parquet     |          1 |         10 |           0 |
-        //  | another_base_file.parquet |        -10 |          0 |           5 |
-        //  +---------------------------+------------+------------+-------------+
-        //
-        // NOTE: Column Stats Index might potentially contain statistics for many columns (if not all), while
-        //       query at hand might only be referencing a handful of those. As such, we collect all the
-        //       column references from the filtering expressions, and only transpose records corresponding to the
-        //       columns referenced in those
-        val transposedColStatsDF =
-        queryReferencedColumns.map(colName =>
-          colStatsDF.filter(col(HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME).equalTo(colName))
-            .select(targetColStatsIndexColumns.map(col): _*)
-            .withColumnRenamed(HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT, getNumNullsColumnNameFor(colName))
-            .withColumnRenamed(HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE, getMinColumnNameFor(colName))
-            .withColumnRenamed(HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE, getMaxColumnNameFor(colName))
-        )
-          .reduceLeft((left, right) =>
-            left.join(right, usingColumn = HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME))
+        val transposedColStatsDF: DataFrame = transposeColumnStatsIndex(spark, colStatsDF, queryReferencedColumns, schema)
 
         // Persist DF to avoid re-computing column statistics unraveling
         withPersistence(transposedColStatsDF) {
@@ -303,6 +257,22 @@ case class HoodieFileIndex(spark: SparkSession,
 
 object HoodieFileIndex extends Logging {
 
+  object DataSkippingFailureMode extends Enumeration {
+    val configName = "hoodie.fileIndex.dataSkippingFailureMode"
+
+    type DataSkippingFailureMode = Value
+
+    case class Val(value: String) extends super.Val {
+      override def toString(): String = value
+    }
+
+    import scala.language.implicitConversions
+    implicit def valueToVal(x: Value): DataSkippingFailureMode = x.asInstanceOf[Val]
+
+    val Fallback: Val = Val("fallback")
+    val Strict: Val   = Val("strict")
+  }
+
   private def collectReferencedColumns(spark: SparkSession, queryFilters: Seq[Expression], schema: StructType): Seq[String] = {
     val resolver = spark.sessionState.analyzer.resolver
     val refs = queryFilters.flatMap(_.references)
@@ -318,7 +288,7 @@ object HoodieFileIndex extends Logging {
     properties.setProperty(HoodieMetadataConfig.ENABLE.key(),
       sqlConf.getConfString(HoodieMetadataConfig.ENABLE.key(),
         HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS.toString))
-    properties.putAll(options.asJava)
+    properties.putAll(options.filter(p => p._2 != null).asJava)
     properties
   }
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
index cf68981d8318f..a176626f76421 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
@@ -20,64 +20,15 @@ package org.apache.hudi
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, SchemaColumnConvertNotSupportedException}
-import org.apache.spark.{Partition, TaskContext}
+import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile}
 
 case class HoodieBaseFileSplit(filePartition: FilePartition) extends HoodieFileSplit
 
-/**
- * TODO eval if we actually need it
- */
 class HoodieFileScanRDD(@transient private val sparkSession: SparkSession,
                         readFunction: PartitionedFile => Iterator[InternalRow],
                         @transient fileSplits: Seq[HoodieBaseFileSplit])
-  extends HoodieUnsafeRDD(sparkSession.sparkContext) {
-
-  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
-    val iterator = new Iterator[InternalRow] with AutoCloseable {
-      private[this] val files = split.asInstanceOf[FilePartition].files.toIterator
-      private[this] var currentFile: PartitionedFile = _
-      private[this] var currentIterator: Iterator[InternalRow] = _
-
-      override def hasNext: Boolean = {
-        (currentIterator != null && currentIterator.hasNext) || nextIterator()
-      }
-
-      def next(): InternalRow = currentIterator.next()
-
-      /** Advances to the next file. Returns true if a new non-empty iterator is available. */
-      private def nextIterator(): Boolean = {
-        if (files.hasNext) {
-          currentFile = files.next()
-          logInfo(s"Reading File $currentFile")
-          currentIterator = readFunction(currentFile)
-
-          try {
-            hasNext
-          } catch {
-            case e: SchemaColumnConvertNotSupportedException =>
-              val message = "Parquet column cannot be converted in " +
-                s"file ${currentFile.filePath}. Column: ${e.getColumn}, " +
-                s"Expected: ${e.getLogicalType}, Found: ${e.getPhysicalType}"
-              throw new QueryExecutionException(message, e)
-
-            case e => throw e
-          }
-        } else {
-          currentFile = null
-          false
-        }
-      }
-
-      override def close(): Unit = {}
-    }
-
-    // Register an on-task-completion callback to close the input stream.
-    context.addTaskCompletionListener[Unit](_ => iterator.close())
-
-    iterator.asInstanceOf[Iterator[InternalRow]]
-  }
+  extends FileScanRDD(sparkSession, readFunction, fileSplits.map(_.filePartition))
+    with HoodieUnsafeRDD {
 
-  override protected def getPartitions: Array[Partition] = fileSplits.map(_.filePartition).toArray
+  override final def collect(): Array[InternalRow] = super[HoodieUnsafeRDD].collect()
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
index cc2915d605ff7..c0c47cff427c3 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
@@ -22,29 +22,36 @@ import org.apache.avro.Schema
 import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-import org.apache.hudi.HoodieDataSourceHelper._
-import org.apache.hudi.HoodieMergeOnReadRDD.resolveAvroSchemaNullability
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hudi.HoodieConversionUtils.{toJavaOption, toScalaOption}
+import org.apache.hudi.HoodieMergeOnReadRDD.{AvroDeserializerSupport, collectFieldOrdinals, getPartitionPath, projectAvro, projectAvroUnsafe, projectRowUnsafe, resolveAvroSchemaNullability}
 import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath
-import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.common.config.HoodieMetadataConfig
 import org.apache.hudi.common.engine.HoodieLocalEngineContext
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath
+import org.apache.hudi.common.model.{HoodieLogFile, HoodieRecord, HoodieRecordPayload, OverwriteWithLatestAvroPayload}
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner
+import org.apache.hudi.common.util.ValidationUtils.checkState
 import org.apache.hudi.config.HoodiePayloadConfig
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
+import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
 import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable
 import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata}
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.avro.HoodieAvroDeserializer
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
+import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection}
 import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
 
 import java.io.Closeable
 import java.util.Properties
+import scala.annotation.tailrec
 import scala.collection.JavaConverters._
-import scala.collection.mutable
 import scala.util.Try
 
 case class HoodieMergeOnReadPartition(index: Int, split: HoodieMergeOnReadFileSplit) extends Partition
@@ -53,14 +60,16 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
                            @transient config: Configuration,
                            fullSchemaFileReader: PartitionedFile => Iterator[InternalRow],
                            requiredSchemaFileReader: PartitionedFile => Iterator[InternalRow],
-                           tableState: HoodieTableState,
                            tableSchema: HoodieTableSchema,
                            requiredSchema: HoodieTableSchema,
+                           tableState: HoodieTableState,
+                           mergeType: String,
                            @transient fileSplits: Seq[HoodieMergeOnReadFileSplit])
-  extends HoodieUnsafeRDD(sc) {
+  extends RDD[InternalRow](sc, Nil) with HoodieUnsafeRDD {
+
+  protected val maxCompactionMemoryInBytes: Long = getMaxCompactionMemoryInBytes(new JobConf(config))
 
   private val confBroadcast = sc.broadcast(new SerializableWritable(config))
-  private val recordKeyField = tableState.recordKeyField
   private val payloadProps = tableState.preCombineFieldOpt
     .map(preCombineField =>
       HoodiePayloadConfig.newBuilder
@@ -70,34 +79,59 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
     )
     .getOrElse(new Properties())
 
+  private val whitelistedPayloadClasses: Set[String] = Seq(
+    classOf[OverwriteWithLatestAvroPayload]
+  ).map(_.getName).toSet
+
   override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
     val mergeOnReadPartition = split.asInstanceOf[HoodieMergeOnReadPartition]
     val iter = mergeOnReadPartition.split match {
       case dataFileOnlySplit if dataFileOnlySplit.logFiles.isEmpty =>
-        requiredSchemaFileReader(dataFileOnlySplit.dataFile.get)
+        requiredSchemaFileReader.apply(dataFileOnlySplit.dataFile.get)
+
       case logFileOnlySplit if logFileOnlySplit.dataFile.isEmpty =>
-        logFileIterator(logFileOnlySplit, getConfig)
-      case skipMergeSplit if skipMergeSplit.mergeType.equals(DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) =>
-        skipMergeFileIterator(skipMergeSplit, requiredSchemaFileReader(skipMergeSplit.dataFile.get), getConfig)
-      case payloadCombineSplit
-        if payloadCombineSplit.mergeType.equals(DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL) =>
-        payloadCombineFileIterator(payloadCombineSplit, fullSchemaFileReader(payloadCombineSplit.dataFile.get),
-          getConfig)
+        new LogFileIterator(logFileOnlySplit, getConfig)
+
+      case split if mergeType.equals(DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) =>
+        val baseFileIterator = requiredSchemaFileReader.apply(split.dataFile.get)
+        new SkipMergeIterator(split, baseFileIterator, getConfig)
+
+      case split if mergeType.equals(DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL) =>
+        val (baseFileIterator, schema) = readBaseFile(split)
+        new RecordMergingFileIterator(split, baseFileIterator, schema, getConfig)
+
       case _ => throw new HoodieException(s"Unable to select an Iterator to read the Hoodie MOR File Split for " +
         s"file path: ${mergeOnReadPartition.split.dataFile.get.filePath}" +
         s"log paths: ${mergeOnReadPartition.split.logFiles.toString}" +
-        s"hoodie table path: ${mergeOnReadPartition.split.tablePath}" +
+        s"hoodie table path: ${tableState.tablePath}" +
         s"spark partition Index: ${mergeOnReadPartition.index}" +
-        s"merge type: ${mergeOnReadPartition.split.mergeType}")
+        s"merge type: ${mergeType}")
     }
+
     if (iter.isInstanceOf[Closeable]) {
       // register a callback to close logScanner which will be executed on task completion.
       // when tasks finished, this method will be called, and release resources.
       Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => iter.asInstanceOf[Closeable].close()))
     }
+
     iter
   }
 
+  private def readBaseFile(split: HoodieMergeOnReadFileSplit): (Iterator[InternalRow], HoodieTableSchema) = {
+    // NOTE: This is an optimization making sure that even for MOR tables we fetch absolute minimum
+    //       of the stored data possible, while still properly executing corresponding relation's semantic
+    //       and meet the query's requirements.
+    //
+    //       Here we assume that iff queried table
+    //          a) It does use one of the standard (and whitelisted) Record Payload classes
+    //       then we can avoid reading and parsing the records w/ _full_ schema, and instead only
+    //       rely on projected one, nevertheless being able to perform merging correctly
+    if (!whitelistedPayloadClasses.contains(tableState.recordPayloadClassName))
+      (fullSchemaFileReader(split.dataFile.get), tableSchema)
+    else
+      (requiredSchemaFileReader(split.dataFile.get), requiredSchema)
+  }
+
   override protected def getPartitions: Array[Partition] =
     fileSplits.zipWithIndex.map(file => HoodieMergeOnReadPartition(file._2, file._1)).toArray
 
@@ -108,270 +142,303 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
     }
   }
 
-  private def logFileIterator(split: HoodieMergeOnReadFileSplit,
-                              config: Configuration): Iterator[InternalRow] =
-    new Iterator[InternalRow] with Closeable with SparkAdapterSupport {
-      private val tableAvroSchema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
-      private val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
-      private val requiredFieldPosition =
-        requiredSchema.structTypeSchema
-          .map(f => tableAvroSchema.getField(f.name).pos()).toList
-      private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
-      private val deserializer = sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredSchema.structTypeSchema)
-      private val unsafeProjection = UnsafeProjection.create(requiredSchema.structTypeSchema)
-      private var logScanner = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config)
-      private val logRecords = logScanner.getRecords
-      private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
-
-      private var recordToLoad: InternalRow = _
-
-      override def hasNext: Boolean = {
-        if (logRecordsKeyIterator.hasNext) {
-          val curAvrokey = logRecordsKeyIterator.next()
-          val curAvroRecord = logRecords.get(curAvrokey).getData.getInsertValue(tableAvroSchema, payloadProps)
-          if (!curAvroRecord.isPresent) {
-            // delete record found, skipping
-            this.hasNext
-          } else {
-            val requiredAvroRecord = AvroConversionUtils.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema,
-              requiredFieldPosition, recordBuilder)
-            val rowOpt = deserializer.deserialize(requiredAvroRecord)
-            recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
-            true
+  /**
+   * Provided w/ instance of [[HoodieMergeOnReadFileSplit]], iterates over all of the records stored in
+   * Delta Log files (represented as [[InternalRow]]s)
+   */
+  private class LogFileIterator(split: HoodieMergeOnReadFileSplit,
+                                config: Configuration)
+    extends Iterator[InternalRow] with Closeable with AvroDeserializerSupport {
+
+    protected override val requiredAvroSchema: Schema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
+    protected override val requiredStructTypeSchema: StructType = requiredSchema.structTypeSchema
+
+    protected val logFileReaderAvroSchema: Schema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
+
+    protected val recordBuilder: GenericRecordBuilder = new GenericRecordBuilder(requiredAvroSchema)
+    protected var recordToLoad: InternalRow = _
+
+    // TODO validate whether we need to do UnsafeProjection
+    protected val unsafeProjection: UnsafeProjection = UnsafeProjection.create(requiredStructTypeSchema)
+
+    // NOTE: This maps _required_ schema fields onto the _full_ table schema, collecting their "ordinals"
+    //       w/in the record payload. This is required, to project records read from the Delta Log file
+    //       which always reads records in full schema (never projected, due to the fact that DL file might
+    //       be stored in non-columnar formats like Avro, HFile, etc)
+    private val requiredSchemaFieldOrdinals: List[Int] = collectFieldOrdinals(requiredAvroSchema, logFileReaderAvroSchema)
+
+    // TODO: now logScanner with internalSchema support column project, we may no need projectAvroUnsafe
+    private var logScanner =
+      HoodieMergeOnReadRDD.scanLog(split.logFiles, getPartitionPath(split), logFileReaderAvroSchema, tableState,
+        maxCompactionMemoryInBytes, config, tableSchema.internalSchema)
+
+    private val logRecords = logScanner.getRecords.asScala
+
+    // NOTE: This iterator iterates over already projected (in required schema) records
+    // NOTE: This have to stay lazy to make sure it's initialized only at the point where it's
+    //       going to be used, since we modify `logRecords` before that and therefore can't do it any earlier
+    protected lazy val logRecordsIterator: Iterator[Option[GenericRecord]] =
+      logRecords.iterator.map {
+        case (_, record) =>
+          val avroRecordOpt = toScalaOption(record.getData.getInsertValue(logFileReaderAvroSchema, payloadProps))
+          avroRecordOpt.map {
+            avroRecord => projectAvroUnsafe(avroRecord, requiredAvroSchema, requiredSchemaFieldOrdinals, recordBuilder)
           }
-        } else {
-          false
-        }
       }
 
-      override def next(): InternalRow = {
-        recordToLoad
-      }
+    protected def removeLogRecord(key: String): Option[HoodieRecord[_ <: HoodieRecordPayload[_]]] =
+      logRecords.remove(key)
 
-      override def close(): Unit = {
-        if (logScanner != null) {
-          try {
-            logScanner.close()
-          } finally {
-            logScanner = null
-          }
-        }
-      }
-    }
+    override def hasNext: Boolean = hasNextInternal
 
-  private def skipMergeFileIterator(split: HoodieMergeOnReadFileSplit,
-                                    baseFileIterator: Iterator[InternalRow],
-                                    config: Configuration): Iterator[InternalRow] =
-    new Iterator[InternalRow] with Closeable with SparkAdapterSupport {
-      private val tableAvroSchema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
-      private val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
-      private val requiredFieldPosition =
-        requiredSchema.structTypeSchema
-          .map(f => tableAvroSchema.getField(f.name).pos()).toList
-      private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
-      private val deserializer = sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredSchema.structTypeSchema)
-      private val unsafeProjection = UnsafeProjection.create(requiredSchema.structTypeSchema)
-      private var logScanner = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config)
-      private val logRecords = logScanner.getRecords
-      private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
-
-      private var recordToLoad: InternalRow = _
-
-      @scala.annotation.tailrec
-      override def hasNext: Boolean = {
-        if (baseFileIterator.hasNext) {
-          val curRow = baseFileIterator.next()
-          recordToLoad = unsafeProjection(curRow)
-          true
+    // NOTE: It's crucial for this method to be annotated w/ [[@tailrec]] to make sure
+    //       that recursion is unfolded into a loop to avoid stack overflows while
+    //       handling records
+    @tailrec private def hasNextInternal: Boolean = {
+      logRecordsIterator.hasNext && {
+        val avroRecordOpt = logRecordsIterator.next()
+        if (avroRecordOpt.isEmpty) {
+          // Record has been deleted, skipping
+          this.hasNextInternal
         } else {
-          if (logRecordsKeyIterator.hasNext) {
-            val curAvrokey = logRecordsKeyIterator.next()
-            val curAvroRecord = logRecords.get(curAvrokey).getData.getInsertValue(tableAvroSchema, payloadProps)
-            if (!curAvroRecord.isPresent) {
-              // delete record found, skipping
-              this.hasNext
-            } else {
-              val requiredAvroRecord = AvroConversionUtils.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema,
-                requiredFieldPosition, recordBuilder)
-              val rowOpt = deserializer.deserialize(requiredAvroRecord)
-              recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
-              true
-            }
-          } else {
-            false
-          }
+          recordToLoad = unsafeProjection(deserialize(avroRecordOpt.get))
+          true
         }
       }
+    }
 
-      override def next(): InternalRow = {
-        recordToLoad
-      }
+    override final def next(): InternalRow = recordToLoad
 
-      override def close(): Unit = {
-        if (logScanner != null) {
-          try {
-            logScanner.close()
-          } finally {
-            logScanner = null
-          }
+    override def close(): Unit =
+      if (logScanner != null) {
+        try {
+          logScanner.close()
+        } finally {
+          logScanner = null
         }
       }
+  }
+
+  /**
+   * Provided w/ instance of [[HoodieMergeOnReadFileSplit]], provides an iterator over all of the records stored in
+   * Base file as well as all of the Delta Log files simply returning concatenation of these streams, while not
+   * performing any combination/merging of the records w/ the same primary keys (ie producing duplicates potentially)
+   */
+  private class SkipMergeIterator(split: HoodieMergeOnReadFileSplit,
+                                  baseFileIterator: Iterator[InternalRow],
+                                  config: Configuration)
+    extends LogFileIterator(split, config) {
+
+    override def hasNext: Boolean = {
+      if (baseFileIterator.hasNext) {
+        val curRow = baseFileIterator.next()
+        recordToLoad = unsafeProjection(curRow)
+        true
+      } else {
+        super[LogFileIterator].hasNext
+      }
     }
+  }
 
-  private def payloadCombineFileIterator(split: HoodieMergeOnReadFileSplit,
-                                         baseFileIterator: Iterator[InternalRow],
-                                         config: Configuration): Iterator[InternalRow] =
-    new Iterator[InternalRow] with Closeable with SparkAdapterSupport {
-      private val tableAvroSchema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
-      private val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
-      private val requiredFieldPosition =
-        requiredSchema.structTypeSchema
-          .map(f => tableAvroSchema.getField(f.name).pos()).toList
-      private val serializer = sparkAdapter.createAvroSerializer(tableSchema.structTypeSchema, tableAvroSchema,
-        resolveAvroSchemaNullability(tableAvroSchema))
-      private val requiredDeserializer = sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredSchema.structTypeSchema)
-      private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
-      private val unsafeProjection = UnsafeProjection.create(requiredSchema.structTypeSchema)
-      private var logScanner = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config)
-      private val logRecords = logScanner.getRecords
-      private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
-      private val keyToSkip = mutable.Set.empty[String]
-      private val recordKeyPosition = tableSchema.structTypeSchema.fieldIndex(recordKeyField)
-
-      private var recordToLoad: InternalRow = _
-
-      @scala.annotation.tailrec
-      override def hasNext: Boolean = {
-        if (baseFileIterator.hasNext) {
-          val curRow = baseFileIterator.next()
-          val curKey = curRow.getString(recordKeyPosition)
-          if (logRecords.containsKey(curKey)) {
-            // duplicate key found, merging
-            keyToSkip.add(curKey)
-            val mergedAvroRecord = mergeRowWithLog(curRow, curKey)
-            if (!mergedAvroRecord.isPresent) {
-              // deleted
-              this.hasNext
-            } else {
-              // load merged record as InternalRow with required schema
-              val requiredAvroRecord = AvroConversionUtils.buildAvroRecordBySchema(mergedAvroRecord.get(), requiredAvroSchema,
-                requiredFieldPosition, recordBuilder)
-              val rowOpt = requiredDeserializer.deserialize(requiredAvroRecord)
-              recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
-              true
-            }
-          } else {
-            // No merge needed, load current row with required schema
-            recordToLoad = unsafeProjection(createInternalRowWithSchema(curRow, requiredSchema.structTypeSchema, requiredFieldPosition))
-            true
-          }
+  /**
+   * Provided w/ instance of [[HoodieMergeOnReadFileSplit]], provides an iterator over all of the records stored in
+   * a) Base file and all of the b) Delta Log files combining records with the same primary key from both of these
+   * streams
+   */
+  private class RecordMergingFileIterator(split: HoodieMergeOnReadFileSplit,
+                                          baseFileIterator: Iterator[InternalRow],
+                                          baseFileReaderSchema: HoodieTableSchema,
+                                          config: Configuration)
+    extends LogFileIterator(split, config) {
+
+    // NOTE: Record-merging iterator supports 2 modes of operation merging records bearing either
+    //        - Full table's schema
+    //        - Projected schema
+    //       As such, no particular schema could be assumed, and therefore we rely on the caller
+    //       to correspondingly set the scheme of the expected output of base-file reader
+    private val baseFileReaderAvroSchema = new Schema.Parser().parse(baseFileReaderSchema.avroSchemaStr)
+    private val requiredSchemaFieldOrdinals: List[Int] = collectFieldOrdinals(requiredAvroSchema, baseFileReaderAvroSchema)
+
+    private val serializer = sparkAdapter.createAvroSerializer(baseFileReaderSchema.structTypeSchema,
+      baseFileReaderAvroSchema, resolveAvroSchemaNullability(baseFileReaderAvroSchema))
+
+    private val recordKeyOrdinal = baseFileReaderSchema.structTypeSchema.fieldIndex(tableState.recordKeyField)
+
+    override def hasNext: Boolean = hasNextInternal
+
+    // NOTE: It's crucial for this method to be annotated w/ [[@tailrec]] to make sure
+    //       that recursion is unfolded into a loop to avoid stack overflows while
+    //       handling records
+    @tailrec private def hasNextInternal: Boolean = {
+      if (baseFileIterator.hasNext) {
+        val curRowRecord = baseFileIterator.next()
+        val curKey = curRowRecord.getString(recordKeyOrdinal)
+        val updatedRecordOpt = removeLogRecord(curKey)
+        if (updatedRecordOpt.isEmpty) {
+          // No merge needed, load current row with required projected schema
+          recordToLoad = unsafeProjection(projectRowUnsafe(curRowRecord, requiredSchema.structTypeSchema, requiredSchemaFieldOrdinals))
+          true
         } else {
-          if (logRecordsKeyIterator.hasNext) {
-            val curKey = logRecordsKeyIterator.next()
-            if (keyToSkip.contains(curKey)) {
-              this.hasNext
-            } else {
-              val insertAvroRecord = logRecords.get(curKey).getData.getInsertValue(tableAvroSchema, payloadProps)
-              if (!insertAvroRecord.isPresent) {
-                // stand alone delete record, skipping
-                this.hasNext
-              } else {
-                val requiredAvroRecord = AvroConversionUtils
-                  .buildAvroRecordBySchema(
-                    insertAvroRecord.get(),
-                    requiredAvroSchema,
-                    requiredFieldPosition,
-                    recordBuilder
-                  )
-                val rowOpt = requiredDeserializer.deserialize(requiredAvroRecord)
-                recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
-                true
-              }
-            }
+          val mergedAvroRecordOpt = merge(serialize(curRowRecord), updatedRecordOpt.get)
+          if (mergedAvroRecordOpt.isEmpty) {
+            // Record has been deleted, skipping
+            this.hasNextInternal
           } else {
-            false
+            // NOTE: In occurrence of a merge we can't know the schema of the record being returned, b/c
+            //       record from the Delta Log will bear (full) Table schema, while record from the Base file
+            //       might already be read in projected one (as an optimization).
+            //       As such we can't use more performant [[projectAvroUnsafe]], and instead have to fallback
+            //       to [[projectAvro]]
+            val projectedAvroRecord = projectAvro(mergedAvroRecordOpt.get, requiredAvroSchema, recordBuilder)
+            recordToLoad = unsafeProjection(deserialize(projectedAvroRecord))
+            true
           }
         }
+      } else {
+        super[LogFileIterator].hasNext
       }
+    }
 
-      override def next(): InternalRow = recordToLoad
-
-      override def close(): Unit = {
-        if (logScanner != null) {
-          try {
-            logScanner.close()
-          } finally {
-            logScanner = null
-          }
-        }
-      }
+    private def serialize(curRowRecord: InternalRow): GenericRecord =
+      serializer.serialize(curRowRecord).asInstanceOf[GenericRecord]
 
-      private def mergeRowWithLog(curRow: InternalRow, curKey: String) : org.apache.hudi.common.util.Option[IndexedRecord] = {
-        val historyAvroRecord = serializer.serialize(curRow).asInstanceOf[GenericRecord]
-        val mergedRec = logRecords.get(curKey).getData
-          .combineAndGetUpdateValue(historyAvroRecord, tableAvroSchema, payloadProps)
-        if (mergedRec.isPresent && mergedRec.get().getSchema != tableAvroSchema) {
-          org.apache.hudi.common.util.Option.of(HoodieAvroUtils.rewriteRecord(mergedRec.get().asInstanceOf[GenericRecord], tableAvroSchema).asInstanceOf[IndexedRecord])
-        } else {
-          mergedRec
-        }
-      }
+    private def merge(curAvroRecord: GenericRecord, newRecord: HoodieRecord[_ <: HoodieRecordPayload[_]]): Option[IndexedRecord] = {
+      // NOTE: We have to pass in Avro Schema used to read from Delta Log file since we invoke combining API
+      //       on the record from the Delta Log
+      toScalaOption(newRecord.getData.combineAndGetUpdateValue(curAvroRecord, logFileReaderAvroSchema, payloadProps))
     }
+  }
 }
 
 private object HoodieMergeOnReadRDD {
-  val CONFIG_INSTANTIATION_LOCK = new Object()
 
-  def scanLog(split: HoodieMergeOnReadFileSplit, logSchema: Schema, config: Configuration): HoodieMergedLogRecordScanner = {
-    val fs = FSUtils.getFs(split.tablePath, config)
-    val logFiles = split.logFiles.get
+  val CONFIG_INSTANTIATION_LOCK = new Object()
 
-    if (HoodieTableMetadata.isMetadataTable(split.tablePath)) {
-      val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).build()
-      val dataTableBasePath = getDataTableBasePathFromMetadataTable(split.tablePath)
+  def scanLog(logFiles: List[HoodieLogFile],
+              partitionPath: Path,
+              logSchema: Schema,
+              tableState: HoodieTableState,
+              maxCompactionMemoryInBytes: Long,
+              hadoopConf: Configuration, internalSchema: InternalSchema = InternalSchema.getEmptyInternalSchema): HoodieMergedLogRecordScanner = {
+    val tablePath = tableState.tablePath
+    val fs = FSUtils.getFs(tablePath, hadoopConf)
+
+    if (HoodieTableMetadata.isMetadataTable(tablePath)) {
+      val metadataConfig = tableState.metadataConfig
+      val dataTableBasePath = getDataTableBasePathFromMetadataTable(tablePath)
       val metadataTable = new HoodieBackedTableMetadata(
-        new HoodieLocalEngineContext(config), metadataConfig,
+        new HoodieLocalEngineContext(hadoopConf), metadataConfig,
         dataTableBasePath,
-        config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
+        hadoopConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
+
+      // We have to force full-scan for the MT log record reader, to make sure
+      // we can iterate over all of the partitions, since by default some of the partitions (Column Stats,
+      // Bloom Filter) are in "point-lookup" mode
+      val forceFullScan = true
 
       // NOTE: In case of Metadata Table partition path equates to partition name (since there's just one level
       //       of indirection among MT partitions)
-      val relativePartitionPath = getRelativePartitionPath(new Path(split.tablePath), getPartitionPath(split))
-      metadataTable.getLogRecordScanner(logFiles.asJava, relativePartitionPath).getLeft
+      val relativePartitionPath = getRelativePartitionPath(new Path(tablePath), partitionPath)
+      metadataTable.getLogRecordScanner(logFiles.asJava, relativePartitionPath, toJavaOption(Some(forceFullScan)))
+        .getLeft
     } else {
       val logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
-        .withBasePath(split.tablePath)
-        .withLogFilePaths(split.logFiles.get.map(logFile => getFilePath(logFile.getPath)).asJava)
+        .withBasePath(tablePath)
+        .withLogFilePaths(logFiles.map(logFile => getFilePath(logFile.getPath)).asJava)
         .withReaderSchema(logSchema)
-        .withLatestInstantTime(split.latestCommit)
+        .withLatestInstantTime(tableState.latestCommitTimestamp)
         .withReadBlocksLazily(
-          Try(config.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP,
+          Try(hadoopConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP,
             HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED).toBoolean)
             .getOrElse(false))
         .withReverseReader(false)
+        .withInternalSchema(internalSchema)
         .withBufferSize(
-          config.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP,
+          hadoopConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP,
             HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
-        .withMaxMemorySizeInBytes(split.maxCompactionMemoryInBytes)
+        .withMaxMemorySizeInBytes(maxCompactionMemoryInBytes)
         .withSpillableMapBasePath(
-          config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
+          hadoopConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
             HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
 
       if (logFiles.nonEmpty) {
-        logRecordScannerBuilder.withPartition(getRelativePartitionPath(new Path(split.tablePath), logFiles.head.getPath.getParent))
+        logRecordScannerBuilder.withPartition(
+          getRelativePartitionPath(new Path(tableState.tablePath), logFiles.head.getPath.getParent))
       }
 
       logRecordScannerBuilder.build()
     }
   }
 
+  /**
+   * Projects provided instance of [[InternalRow]] into provided schema, assuming that the
+   * the schema of the original row is strictly a superset of the given one
+   */
+  private def projectRowUnsafe(row: InternalRow,
+                         projectedSchema: StructType,
+                         ordinals: Seq[Int]): InternalRow = {
+    val projectedRow = new SpecificInternalRow(projectedSchema)
+    var curIndex = 0
+    projectedSchema.zip(ordinals).foreach { case (field, pos) =>
+      val curField = if (row.isNullAt(pos)) {
+        null
+      } else {
+        row.get(pos, field.dataType)
+      }
+      projectedRow.update(curIndex, curField)
+      curIndex += 1
+    }
+    projectedRow
+  }
+
+  /**
+   * Projects provided instance of [[IndexedRecord]] into provided schema, assuming that the
+   * the schema of the original row is strictly a superset of the given one
+   */
+  def projectAvroUnsafe(record: IndexedRecord,
+                        projectedSchema: Schema,
+                        ordinals: List[Int],
+                        recordBuilder: GenericRecordBuilder): GenericRecord = {
+    val fields = projectedSchema.getFields.asScala
+    checkState(fields.length == ordinals.length)
+    fields.zip(ordinals).foreach {
+      case (field, pos) => recordBuilder.set(field, record.get(pos))
+    }
+    recordBuilder.build()
+  }
+
+  /**
+   * Projects provided instance of [[IndexedRecord]] into provided schema, assuming that the
+   * the schema of the original row is strictly a superset of the given one
+   *
+   * This is a "safe" counterpart of [[projectAvroUnsafe]]: it does build mapping of the record's
+   * schema into projected one itself (instead of expecting such mapping from the caller)
+   */
+  def projectAvro(record: IndexedRecord,
+                  projectedSchema: Schema,
+                  recordBuilder: GenericRecordBuilder): GenericRecord = {
+    projectAvroUnsafe(record, projectedSchema, collectFieldOrdinals(projectedSchema, record.getSchema), recordBuilder)
+  }
+
+  /**
+   * Maps [[projected]] [[Schema]] onto [[source]] one, collecting corresponding field ordinals w/in it, which
+   * will be subsequently used by either [[projectRowUnsafe]] or [[projectAvroUnsafe()]] method
+   *
+   * @param projected target projected schema (which is a proper subset of [[source]] [[Schema]])
+   * @param source source schema of the record being projected
+   * @return list of ordinals of corresponding fields of [[projected]] schema w/in [[source]] one
+   */
+  private def collectFieldOrdinals(projected: Schema, source: Schema): List[Int] = {
+    projected.getFields.asScala.map(f => source.getField(f.name()).pos()).toList
+  }
+
   private def getPartitionPath(split: HoodieMergeOnReadFileSplit): Path = {
     // Determine partition path as an immediate parent folder of either
     //    - The base file
     //    - Some log file
     split.dataFile.map(baseFile => new Path(baseFile.filePath))
-      .getOrElse(split.logFiles.get.head.getPath)
+      .getOrElse(split.logFiles.head.getPath)
       .getParent
   }
 
@@ -380,4 +447,17 @@ private object HoodieMergeOnReadRDD {
       case (nullable, _) => nullable
     }
   }
+
+  trait AvroDeserializerSupport extends SparkAdapterSupport {
+    protected val requiredAvroSchema: Schema
+    protected val requiredStructTypeSchema: StructType
+
+    private lazy val deserializer: HoodieAvroDeserializer =
+      sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredStructTypeSchema)
+
+    protected def deserialize(avroRecord: GenericRecord): InternalRow = {
+      checkState(avroRecord.getSchema.getFields.size() == requiredStructTypeSchema.fields.length)
+      deserializer.deserialize(avroRecord).get.asInstanceOf[InternalRow]
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index fc83cebc945d4..c86b1615ba58d 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.HoodieConversionUtils.toProperties
 import org.apache.hudi.HoodieWriterUtils._
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.client.{HoodieWriteResult, SparkRDDWriteClient}
@@ -39,6 +40,8 @@ import org.apache.hudi.execution.bulkinsert.{BulkInsertInternalPartitionerWithRo
 import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool}
 import org.apache.hudi.index.SparkHoodieIndexFactory
 import org.apache.hudi.internal.DataSourceInternalWriterHelper
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.utils.{AvroSchemaEvolutionUtils, SerDeHelper}
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
 import org.apache.hudi.keygen.{TimestampBasedAvroKeyGenerator, TimestampBasedKeyGenerator}
 import org.apache.hudi.sync.common.HoodieSyncConfig
@@ -87,6 +90,8 @@ object HoodieSparkSqlWriter {
     val originKeyGeneratorClassName = HoodieWriterUtils.getOriginKeyGenerator(parameters)
     val timestampKeyGeneratorConfigs = extractConfigsRelatedToTimestmapBasedKeyGenerator(
       originKeyGeneratorClassName, parameters)
+    //validate datasource and tableconfig keygen are the same
+    validateKeyGeneratorConfig(originKeyGeneratorClassName, tableConfig);
     val databaseName = hoodieConfig.getStringOrDefault(HoodieTableConfig.DATABASE_NAME, "")
     val tblName = hoodieConfig.getStringOrThrow(HoodieWriteConfig.TBL_NAME,
       s"'${HoodieWriteConfig.TBL_NAME.key}' must be set.").trim
@@ -136,6 +141,7 @@ object HoodieSparkSqlWriter {
         val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.ARCHIVELOG_FOLDER)
         val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD)
         val populateMetaFields = hoodieConfig.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS)
+        val useBaseFormatMetaFile = hoodieConfig.getBooleanOrDefault(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT);
 
         val tableMetaClient = HoodieTableMetaClient.withPropertyBuilder()
           .setTableType(tableType)
@@ -153,6 +159,8 @@ object HoodieSparkSqlWriter {
           .set(timestampKeyGeneratorConfigs)
           .setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
           .setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
+          .setPartitionMetafileUseBaseFormat(useBaseFormatMetaFile)
+          .setDropPartitionColumnsWhenWrite(hoodieConfig.getBooleanOrDefault(HoodieTableConfig.DROP_PARTITION_COLUMNS))
           .setCommitTimezone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getStringOrDefault(HoodieTableConfig.TIMELINE_TIMEZONE)))
           .initTable(sparkContext.hadoopConfiguration, path)
         tableConfig = tableMetaClient.getTableConfig
@@ -184,9 +192,10 @@ object HoodieSparkSqlWriter {
             }
 
             // Create a HoodieWriteClient & issue the delete.
+            val internalSchemaOpt = getLatestTableInternalSchema(fs, basePath, sparkContext)
             val client = hoodieWriteClient.getOrElse(DataSourceUtils.createHoodieClient(jsc,
               null, path, tblName,
-              mapAsJavaMap(parameters - HoodieWriteConfig.AUTO_COMMIT_ENABLE.key)))
+              mapAsJavaMap(addSchemaEvolutionParameters(parameters, internalSchemaOpt) - HoodieWriteConfig.AUTO_COMMIT_ENABLE.key)))
               .asInstanceOf[SparkRDDWriteClient[HoodieRecordPayload[Nothing]]]
 
             if (isAsyncCompactionEnabled(client, tableConfig, parameters, jsc.hadoopConfiguration())) {
@@ -231,8 +240,17 @@ object HoodieSparkSqlWriter {
               Array(classOf[org.apache.avro.generic.GenericData],
                 classOf[org.apache.avro.Schema]))
             var schema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName, nameSpace)
+            val lastestSchema = getLatestTableSchema(fs, basePath, sparkContext, schema)
+            val internalSchemaOpt = getLatestTableInternalSchema(fs, basePath, sparkContext)
             if (reconcileSchema) {
-              schema = getLatestTableSchema(fs, basePath, sparkContext, schema)
+              schema = lastestSchema
+            }
+            if (internalSchemaOpt.isDefined) {
+              schema = {
+                val newSparkSchema = AvroConversionUtils.convertAvroSchemaToStructType(AvroSchemaEvolutionUtils.canonicalizeColumnNullability(schema, lastestSchema))
+                AvroConversionUtils.convertStructTypeToAvroSchema(newSparkSchema, structName, nameSpace)
+
+              }
             }
             validateSchemaForHoodieIsDeleted(schema)
             sparkContext.getConf.registerAvroSchemas(schema)
@@ -264,8 +282,9 @@ object HoodieSparkSqlWriter {
 
             val writeSchema = if (dropPartitionColumns) generateSchemaWithoutPartitionColumns(partitionColumns, schema) else schema
             // Create a HoodieWriteClient & issue the write.
+
             val client = hoodieWriteClient.getOrElse(DataSourceUtils.createHoodieClient(jsc, writeSchema.toString, path,
-              tblName, mapAsJavaMap(parameters - HoodieWriteConfig.AUTO_COMMIT_ENABLE.key)
+              tblName, mapAsJavaMap(addSchemaEvolutionParameters(parameters, internalSchemaOpt) - HoodieWriteConfig.AUTO_COMMIT_ENABLE.key)
             )).asInstanceOf[SparkRDDWriteClient[HoodieRecordPayload[Nothing]]]
 
             if (isAsyncCompactionEnabled(client, tableConfig, parameters, jsc.hadoopConfiguration())) {
@@ -315,6 +334,36 @@ object HoodieSparkSqlWriter {
     processedRecord
   }
 
+  def addSchemaEvolutionParameters(parameters: Map[String, String], internalSchemaOpt: Option[InternalSchema]): Map[String, String] = {
+    val schemaEvolutionEnable = if (internalSchemaOpt.isDefined) "true" else "false"
+    parameters ++ Map(HoodieWriteConfig.INTERNAL_SCHEMA_STRING.key() -> SerDeHelper.toJson(internalSchemaOpt.getOrElse(null)),
+      HoodieWriteConfig.SCHEMA_EVOLUTION_ENABLE.key() -> schemaEvolutionEnable)
+  }
+
+  /**
+    * get latest internalSchema from table
+    *
+    * @param fs           instance of FileSystem.
+    * @param basePath     base path.
+    * @param sparkContext instance of spark context.
+    * @param schema       incoming record's schema.
+    * @return Pair of(boolean, table schema), where first entry will be true only if schema conversion is required.
+    */
+  def getLatestTableInternalSchema(fs: FileSystem, basePath: Path, sparkContext: SparkContext): Option[InternalSchema] = {
+    try {
+      if (FSUtils.isTableExists(basePath.toString, fs)) {
+        val tableMetaClient = HoodieTableMetaClient.builder.setConf(sparkContext.hadoopConfiguration).setBasePath(basePath.toString).build()
+        val tableSchemaResolver = new TableSchemaResolver(tableMetaClient)
+        val internalSchemaOpt = tableSchemaResolver.getTableInternalSchemaFromCommitMetadata
+        if (internalSchemaOpt.isPresent) Some(internalSchemaOpt.get()) else None
+      } else {
+        None
+      }
+    } catch {
+      case _: Exception => None
+    }
+  }
+
   /**
    * Checks if schema needs upgrade (if incoming record's write schema is old while table schema got evolved).
    *
@@ -391,9 +440,15 @@ object HoodieSparkSqlWriter {
         val partitionColumns = HoodieWriterUtils.getPartitionColumns(parameters)
         val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD)
         val keyGenProp = hoodieConfig.getString(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)
-        val populateMetaFields = java.lang.Boolean.parseBoolean((parameters.getOrElse(HoodieTableConfig.POPULATE_META_FIELDS.key(),
-          String.valueOf(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()))))
+        val populateMetaFields = java.lang.Boolean.parseBoolean(parameters.getOrElse(
+          HoodieTableConfig.POPULATE_META_FIELDS.key(),
+          String.valueOf(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue())
+        ))
         val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.BASE_FILE_FORMAT)
+        val useBaseFormatMetaFile = java.lang.Boolean.parseBoolean(parameters.getOrElse(
+          HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key(),
+          String.valueOf(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.defaultValue())
+        ))
 
         HoodieTableMetaClient.withPropertyBuilder()
           .setTableType(HoodieTableType.valueOf(tableType))
@@ -411,6 +466,7 @@ object HoodieSparkSqlWriter {
           .setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
           .setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
           .setCommitTimezone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getStringOrDefault(HoodieTableConfig.TIMELINE_TIMEZONE)))
+          .setPartitionMetafileUseBaseFormat(useBaseFormatMetaFile)
           .initTable(sparkContext.hadoopConfiguration, path)
       }
 
@@ -446,8 +502,8 @@ object HoodieSparkSqlWriter {
     val sparkContext = sqlContext.sparkContext
     val populateMetaFields = java.lang.Boolean.parseBoolean((parameters.getOrElse(HoodieTableConfig.POPULATE_META_FIELDS.key(),
       String.valueOf(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()))))
-    val dropPartitionColumns =
-      parameters.getOrElse(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key(), DataSourceWriteOptions.DROP_PARTITION_COLUMNS.defaultValue()).toBoolean
+    val dropPartitionColumns = parameters.get(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key()).map(_.toBoolean)
+      .getOrElse(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.defaultValue())
     // register classes & schemas
     val (structName, nameSpace) = AvroConversionUtils.getAvroRecordNameAndNamespace(tblName)
     sparkContext.getConf.registerKryoClasses(
@@ -512,12 +568,6 @@ object HoodieSparkSqlWriter {
     (syncHiveSuccess, common.util.Option.ofNullable(instantTime))
   }
 
-  def toProperties(params: Map[String, String]): TypedProperties = {
-    val props = new TypedProperties()
-    params.foreach(kv => props.setProperty(kv._1, kv._2))
-    props
-  }
-
   private def handleSaveModes(spark: SparkSession, mode: SaveMode, tablePath: Path, tableConfig: HoodieTableConfig, tableName: String,
                               operation: WriteOperationType, fs: FileSystem): Unit = {
     if (mode == SaveMode.Append && tableExists) {
@@ -562,6 +612,7 @@ object HoodieSparkSqlWriter {
 
     if (metaSyncEnabled) {
       val fs = basePath.getFileSystem(spark.sessionState.newHadoopConf())
+      val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT);
       val properties = new TypedProperties()
       properties.putAll(hoodieConfig.getProps)
       properties.put(HiveSyncConfig.HIVE_SYNC_SCHEMA_STRING_LENGTH_THRESHOLD.key, spark.sessionState.conf.getConf(StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD).toString)
@@ -572,7 +623,7 @@ object HoodieSparkSqlWriter {
       hiveConf.addResource(fs.getConf)
 
       syncClientToolClassSet.foreach(impl => {
-        SyncUtilHelpers.runHoodieMetaSync(impl.trim, properties, hiveConf, fs, basePath.toString, HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT.defaultValue)
+        SyncUtilHelpers.runHoodieMetaSync(impl.trim, properties, hiveConf, fs, basePath.toString, baseFileFormat)
       })
     }
     true
@@ -592,7 +643,7 @@ object HoodieSparkSqlWriter {
                                              jsc: JavaSparkContext,
                                              tableInstantInfo: TableInstantInfo
                                             ): (Boolean, common.util.Option[java.lang.String], common.util.Option[java.lang.String]) = {
-    if (writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors).isEmpty()) {
+    if (writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors).count() == 0) {
       log.info("Proceeding to commit the write.")
       val metaMap = parameters.filter(kv =>
         kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX.key)))
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala
index 8d8ebfa7e54f1..2befb47e5e02b 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala
@@ -81,7 +81,9 @@ class HoodieStreamingSink(sqlContext: SQLContext,
     // Override to use direct markers. In Structured streaming, timeline server is closed after
     // first micro-batch and subsequent micro-batches do not have timeline server running.
     // Thus, we can't use timeline-server-based markers.
-    val updatedOptions = options.updated(HoodieWriteConfig.MARKERS_TYPE.key(), MarkerType.DIRECT.name())
+    var updatedOptions = options.updated(HoodieWriteConfig.MARKERS_TYPE.key(), MarkerType.DIRECT.name())
+    // we need auto adjustment enabled for streaming sink since async table services are feasible within the same JVM.
+    updatedOptions = updatedOptions.updated(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key, "true")
 
     retry(retryCnt, retryIntervalMs)(
       Try(
@@ -205,7 +207,8 @@ class HoodieStreamingSink(sqlContext: SQLContext,
   protected def triggerAsyncClustering(client: SparkRDDWriteClient[HoodieRecordPayload[Nothing]]): Unit = {
     if (null ==  asyncClusteringService) {
       log.info("Triggering async clustering!")
-      asyncClusteringService = new SparkStreamingAsyncClusteringService(client)
+      asyncClusteringService = new SparkStreamingAsyncClusteringService(new HoodieSparkEngineContext(new JavaSparkContext(sqlContext.sparkContext)),
+        client)
       asyncClusteringService.start(new Function[java.lang.Boolean, java.lang.Boolean] {
         override def apply(errored: lang.Boolean): lang.Boolean = {
           log.info(s"Async clustering service shutdown. Errored ? $errored")
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieUnsafeRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieUnsafeRDD.scala
index 3f95746a54669..51b03a0024efc 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieUnsafeRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieUnsafeRDD.scala
@@ -56,12 +56,8 @@ import org.apache.spark.{Partition, SparkContext, TaskContext}
  * NOTE: It enforces, for ex, that all of the RDDs implement [[compute]] method returning
  *       [[InternalRow]] to avoid superfluous ser/de
  */
-abstract class HoodieUnsafeRDD(@transient sc: SparkContext)
-  extends RDD[InternalRow](sc, Nil) {
-
-  def compute(split: Partition, context: TaskContext): Iterator[InternalRow]
-
-  override final def collect(): Array[InternalRow] =
+trait HoodieUnsafeRDD extends RDD[InternalRow] {
+  override def collect(): Array[InternalRow] =
     throw new UnsupportedOperationException(
       "This method will not function correctly, please refer to scala-doc for HoodieUnsafeRDD"
     )
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
index a4d76763fc558..60428415861be 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
@@ -18,13 +18,16 @@
 package org.apache.hudi
 
 import java.util.Properties
+
 import org.apache.hudi.DataSourceOptionsHelper.allAlternatives
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE
 import org.apache.hudi.common.config.{DFSPropertiesConfiguration, HoodieConfig, TypedProperties}
 import org.apache.hudi.common.table.HoodieTableConfig
+import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hive.HiveSyncConfig
+import org.apache.hudi.keygen.{NonpartitionedKeyGenerator, SimpleKeyGenerator}
 import org.apache.hudi.sync.common.HoodieSyncConfig
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.hudi.command.SqlKeyGenerator
@@ -90,12 +93,6 @@ object HoodieWriterUtils {
     Map() ++ hoodieConfig.getProps.asScala ++ globalProps ++ DataSourceOptionsHelper.translateConfigurations(parameters)
   }
 
-  def toProperties(params: Map[String, String]): TypedProperties = {
-    val props = new TypedProperties()
-    params.foreach(kv => props.setProperty(kv._1, kv._2))
-    props
-  }
-
   /**
    * Get the partition columns to stored to hoodie.properties.
    * @param parameters
@@ -159,6 +156,36 @@ object HoodieWriterUtils {
       }
     }
 
+    if (diffConfigs.nonEmpty) {
+      diffConfigs.insert(0, "\nConfig conflict(key\tcurrent value\texisting value):\n")
+      throw new HoodieException(diffConfigs.toString.trim)
+    }
+    // Check schema evolution for bootstrap table.
+    // now we do not support bootstrap table.
+    if (params.get(OPERATION.key).contains(BOOTSTRAP_OPERATION_OPT_VAL)
+      && params.getOrElse(HoodieWriteConfig.SCHEMA_EVOLUTION_ENABLE.key(), "false").toBoolean) {
+      throw new HoodieException(String
+        .format("now schema evolution cannot support bootstrap table, pls set %s to false", HoodieWriteConfig.SCHEMA_EVOLUTION_ENABLE.key()))
+    }
+  }
+
+  /**
+   * Detects conflicts between datasourceKeyGen and existing table configuration keyGen
+   */
+  def validateKeyGeneratorConfig(datasourceKeyGen: String, tableConfig: HoodieConfig): Unit = {
+    val diffConfigs = StringBuilder.newBuilder
+
+    if (null != tableConfig) {
+      val tableConfigKeyGen = tableConfig.getString(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)
+      if (null != tableConfigKeyGen && null != datasourceKeyGen) {
+        val nonPartitionedTableConfig = tableConfigKeyGen.equals(classOf[NonpartitionedKeyGenerator].getCanonicalName)
+        val simpleKeyDataSourceConfig = datasourceKeyGen.equals(classOf[SimpleKeyGenerator].getCanonicalName)
+        if (nonPartitionedTableConfig && simpleKeyDataSourceConfig) {
+          diffConfigs.append(s"KeyGenerator:\t$datasourceKeyGen\t$tableConfigKeyGen\n")
+        }
+      }
+    }
+
     if (diffConfigs.nonEmpty) {
       diffConfigs.insert(0, "\nConfig conflict(key\tcurrent value\texisting value):\n")
       throw new HoodieException(diffConfigs.toString.trim)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala
index 9247973e78fc0..039dafb596d8d 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala
@@ -18,17 +18,20 @@
 package org.apache.hudi
 
 import org.apache.avro.Schema
-import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata}
+import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieFileFormat, HoodieRecord, HoodieReplaceCommitMetadata}
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
-
 import java.util.stream.Collectors
+
 import org.apache.hadoop.fs.{GlobPattern, Path}
 import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
-import org.apache.hudi.common.util.HoodieTimer
+import org.apache.hudi.common.util.{HoodieTimer, InternalSchemaCache}
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.utils.SerDeHelper
 import org.apache.hudi.table.HoodieSparkTable
 import org.apache.log4j.LogManager
 import org.apache.spark.api.java.JavaSparkContext
@@ -82,10 +85,17 @@ class IncrementalRelation(val sqlContext: SQLContext,
   private val commitsToReturn = commitsTimelineToReturn.getInstants.iterator().toList
 
   // use schema from a file produced in the end/latest instant
-  val usedSchema: StructType = {
+
+  val (usedSchema, internalSchema) = {
     log.info("Inferring schema..")
     val schemaResolver = new TableSchemaResolver(metaClient)
-    val tableSchema = if (useEndInstantSchema) {
+    val iSchema = if (useEndInstantSchema && !commitsToReturn.isEmpty) {
+      InternalSchemaCache.searchSchemaAndCache(commitsToReturn.last.getTimestamp.toLong, metaClient, hoodieTable.getConfig.getInternalSchemaCacheEnable)
+    } else {
+      schemaResolver.getTableInternalSchemaFromCommitMetadata.orElse(null)
+    }
+
+    val tableSchema = if (useEndInstantSchema && iSchema.isEmptySchema) {
       if (commitsToReturn.isEmpty) schemaResolver.getTableAvroSchemaWithoutMetadataFields() else
         schemaResolver.getTableAvroSchemaWithoutMetadataFields(commitsToReturn.last)
     } else {
@@ -93,10 +103,15 @@ class IncrementalRelation(val sqlContext: SQLContext,
     }
     if (tableSchema.getType == Schema.Type.NULL) {
       // if there is only one commit in the table and is an empty commit without schema, return empty RDD here
-      StructType(Nil)
+      (StructType(Nil), InternalSchema.getEmptyInternalSchema)
     } else {
       val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
-      StructType(skeletonSchema.fields ++ dataSchema.fields)
+      if (iSchema != null && !iSchema.isEmptySchema) {
+        // if internalSchema is ready, dataSchema will contains skeletonSchema
+        (dataSchema, iSchema)
+      } else {
+        (StructType(skeletonSchema.fields ++ dataSchema.fields), InternalSchema.getEmptyInternalSchema)
+      }
     }
   }
 
@@ -161,6 +176,16 @@ class IncrementalRelation(val sqlContext: SQLContext,
       }
       // unset the path filter, otherwise if end_instant_time is not the latest instant, path filter set for RO view
       // will filter out all the files incorrectly.
+      // pass internalSchema to hadoopConf, so it can be used in executors.
+      val validCommits = metaClient
+        .getCommitsAndCompactionTimeline.filterCompletedInstants.getInstants.toArray().map(_.asInstanceOf[HoodieInstant].getFileName).mkString(",")
+      sqlContext.sparkContext.hadoopConfiguration.set(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, SerDeHelper.toJson(internalSchema))
+      sqlContext.sparkContext.hadoopConfiguration.set(SparkInternalSchemaConverter.HOODIE_TABLE_PATH, metaClient.getBasePath)
+      sqlContext.sparkContext.hadoopConfiguration.set(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST, validCommits)
+      val formatClassName = metaClient.getTableConfig.getBaseFileFormat match {
+        case HoodieFileFormat.PARQUET => if (!internalSchema.isEmptySchema) "HoodieParquet" else "parquet"
+        case HoodieFileFormat.ORC => "orc"
+      }
       sqlContext.sparkContext.hadoopConfiguration.unset("mapreduce.input.pathFilter.class")
       val sOpts = optParams.filter(p => !p._1.equalsIgnoreCase("path"))
       if (filteredRegularFullPaths.isEmpty && filteredMetaBootstrapFullPaths.isEmpty) {
@@ -216,8 +241,8 @@ class IncrementalRelation(val sqlContext: SQLContext,
 
           if (regularFileIdToFullPath.nonEmpty) {
             df = df.union(sqlContext.read.options(sOpts)
-              .schema(usedSchema)
-              .parquet(filteredRegularFullPaths.toList: _*)
+              .schema(usedSchema).format(formatClassName)
+              .load(filteredRegularFullPaths.toList: _*)
               .filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
                 commitsToReturn.head.getTimestamp))
               .filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
index 2517252d700fb..46e395fc2bfe1 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
@@ -48,6 +48,11 @@ class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
 
   override type FileSplit = HoodieMergeOnReadFileSplit
 
+  override def imbueConfigs(sqlContext: SQLContext): Unit = {
+    super.imbueConfigs(sqlContext)
+    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
+  }
+
   override protected def timeline: HoodieTimeline = {
     val startTimestamp = optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key)
     val endTimestamp = optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key, super.timeline.lastInstant().get.getTimestamp)
@@ -75,7 +80,7 @@ class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
       options = optParams,
       // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
       //       to configure Parquet reader appropriately
-      hadoopConf = new Configuration(conf)
+      hadoopConf = HoodieDataSourceHelper.getConfigurationWithInternalSchema(new Configuration(conf), internalSchema, metaClient.getBasePath, validCommits)
     )
 
     val requiredSchemaParquetReader = createBaseFileReader(
@@ -87,15 +92,14 @@ class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
       options = optParams,
       // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
       //       to configure Parquet reader appropriately
-      hadoopConf = new Configuration(conf)
+      hadoopConf = HoodieDataSourceHelper.getConfigurationWithInternalSchema(new Configuration(conf), requiredSchema.internalSchema, metaClient.getBasePath, validCommits)
     )
 
-    val hoodieTableState = HoodieTableState(HoodieRecord.RECORD_KEY_METADATA_FIELD, preCombineFieldOpt)
-
+    val hoodieTableState = getTableState
     // TODO(HUDI-3639) implement incremental span record filtering w/in RDD to make sure returned iterator is appropriately
     //                 filtered, since file-reader might not be capable to perform filtering
-    new HoodieMergeOnReadRDD(sqlContext.sparkContext, jobConf, fullSchemaParquetReader,
-      requiredSchemaParquetReader, hoodieTableState, tableSchema, requiredSchema, fileSplits)
+    new HoodieMergeOnReadRDD(sqlContext.sparkContext, jobConf, fullSchemaParquetReader, requiredSchemaParquetReader,
+      tableSchema, requiredSchema, hoodieTableState, mergeType, fileSplits)
   }
 
   override protected def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): List[HoodieMergeOnReadFileSplit] = {
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
index d2515e3297d0f..d85788e25b303 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
@@ -39,11 +39,7 @@ import org.apache.spark.sql.types.StructType
 import scala.collection.JavaConverters._
 
 case class HoodieMergeOnReadFileSplit(dataFile: Option[PartitionedFile],
-                                      logFiles: Option[List[HoodieLogFile]],
-                                      latestCommit: String,
-                                      tablePath: String,
-                                      maxCompactionMemoryInBytes: Long,
-                                      mergeType: String) extends HoodieFileSplit
+                                      logFiles: List[HoodieLogFile]) extends HoodieFileSplit
 
 class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
                                   optParams: Map[String, String],
@@ -54,13 +50,18 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
 
   override type FileSplit = HoodieMergeOnReadFileSplit
 
-  private val mergeType = optParams.getOrElse(
-    DataSourceReadOptions.REALTIME_MERGE.key,
+  override lazy val mandatoryColumns: Seq[String] =
+    Seq(recordKeyField) ++ preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
+
+  protected val mergeType: String = optParams.getOrElse(DataSourceReadOptions.REALTIME_MERGE.key,
     DataSourceReadOptions.REALTIME_MERGE.defaultValue)
 
-  private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
+  override def imbueConfigs(sqlContext: SQLContext): Unit = {
+    super.imbueConfigs(sqlContext)
+    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true")
+  }
 
-  protected override def composeRDD(fileIndex: Seq[HoodieMergeOnReadFileSplit],
+  protected override def composeRDD(fileSplits: Seq[HoodieMergeOnReadFileSplit],
                                     partitionSchema: StructType,
                                     tableSchema: HoodieTableSchema,
                                     requiredSchema: HoodieTableSchema,
@@ -78,7 +79,7 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
       options = optParams,
       // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
       //       to configure Parquet reader appropriately
-      hadoopConf = new Configuration(conf)
+      hadoopConf = HoodieDataSourceHelper.getConfigurationWithInternalSchema(new Configuration(conf), internalSchema, metaClient.getBasePath, validCommits)
     )
 
     val requiredSchemaParquetReader = createBaseFileReader(
@@ -90,13 +91,12 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
       options = optParams,
       // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
       //       to configure Parquet reader appropriately
-      hadoopConf = new Configuration(conf)
+      hadoopConf = HoodieDataSourceHelper.getConfigurationWithInternalSchema(new Configuration(conf), requiredSchema.internalSchema, metaClient.getBasePath, validCommits)
     )
 
-    val tableState = HoodieTableState(recordKeyField, preCombineFieldOpt)
-
-    new HoodieMergeOnReadRDD(sqlContext.sparkContext, jobConf, fullSchemaParquetReader,
-      requiredSchemaParquetReader, tableState, tableSchema, requiredSchema, fileIndex)
+    val tableState = getTableState
+    new HoodieMergeOnReadRDD(sqlContext.sparkContext, jobConf, fullSchemaParquetReader, requiredSchemaParquetReader,
+      tableSchema, requiredSchema, tableState, mergeType, fileSplits)
   }
 
   protected override def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): List[HoodieMergeOnReadFileSplit] = {
@@ -123,15 +123,14 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
   protected def buildSplits(fileSlices: Seq[FileSlice]): List[HoodieMergeOnReadFileSplit] = {
     fileSlices.map { fileSlice =>
       val baseFile = toScalaOption(fileSlice.getBaseFile)
-      val logFiles = Option(fileSlice.getLogFiles.sorted(HoodieLogFile.getLogFileComparator).iterator().asScala.toList)
+      val logFiles = fileSlice.getLogFiles.sorted(HoodieLogFile.getLogFileComparator).iterator().asScala.toList
 
       val partitionedBaseFile = baseFile.map { file =>
         val filePath = getFilePath(file.getFileStatus.getPath)
-        PartitionedFile(InternalRow.empty, filePath, 0, file.getFileLen)
+        PartitionedFile(getPartitionColumnsAsInternalRow(file.getFileStatus), filePath, 0, file.getFileLen)
       }
 
-      HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles, queryTimestamp.get,
-        metaClient.getBasePath, maxCompactionMemoryInBytes, mergeType)
+      HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles)
     }.toList
   }
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
index 75dee2108914f..73e1f86948e88 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
@@ -27,9 +27,9 @@ object SparkConfigs {
 
   /*
   When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
-  scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
-  file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
-  sink, users have to set this property when they invoke spark shell.
+  scheduling configs for regular writes and async table services like compaction and clustering. This is the property
+  used to set custom scheduler config file with spark. In Deltastreamer, the file is generated within hudi and set if
+  necessary. Where as in case of streaming sink, users have to set this property when they invoke spark shell.
   Sample format of the file contents.
   <?xml version="1.0"?>
   <allocations>
@@ -43,6 +43,11 @@ object SparkConfigs {
       <weight>3</weight>
       <minShare>1</minShare>
     </pool>
+    <pool name="hoodiecluster">
+      <schedulingMode>FAIR</schedulingMode>
+      <weight>2</weight>
+      <minShare>1</minShare>
+    </pool>
   </allocations>
    */
   val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
index a06ffffe50e50..1305323bd1a28 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -308,7 +308,7 @@ object SparkHoodieTableFileIndex {
   }
 
   private def deduceQueryType(configProperties: TypedProperties): HoodieTableQueryType = {
-    configProperties.asScala(QUERY_TYPE.key) match {
+    configProperties.asScala.getOrElse(QUERY_TYPE.key, QUERY_TYPE.defaultValue) match {
       case QUERY_TYPE_SNAPSHOT_OPT_VAL => HoodieTableQueryType.SNAPSHOT
       case QUERY_TYPE_INCREMENTAL_OPT_VAL => HoodieTableQueryType.INCREMENTAL
       case QUERY_TYPE_READ_OPTIMIZED_OPT_VAL => HoodieTableQueryType.READ_OPTIMIZED
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala
deleted file mode 100644
index d640c02261742..0000000000000
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql
-
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
-import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression}
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation}
-import org.apache.spark.sql.types.StructType
-
-object HoodieCatalystExpressionUtils {
-
-  /**
-   * Resolve filter expression from string expr with given table schema, for example:
-   * <pre>
-   *   ts > 1000 and ts <= 1500
-   * </pre>
-   * will be resolved as
-   * <pre>
-   *   And(GreaterThan(ts#590L > 1000), LessThanOrEqual(ts#590L <= 1500))
-   * </pre>
-   *
-   * @param spark       The spark session
-   * @param exprString  String to be resolved
-   * @param tableSchema The table schema
-   * @return Resolved filter expression
-   */
-  def resolveFilterExpr(spark: SparkSession, exprString: String, tableSchema: StructType): Expression = {
-    val expr = spark.sessionState.sqlParser.parseExpression(exprString)
-    resolveFilterExpr(spark, expr, tableSchema)
-  }
-
-  def resolveFilterExpr(spark: SparkSession, expr: Expression, tableSchema: StructType): Expression = {
-    val schemaFields = tableSchema.fields
-    val resolvedExpr = spark.sessionState.analyzer.ResolveReferences(
-      Filter(expr,
-        LocalRelation(schemaFields.head, schemaFields.drop(1): _*))
-    )
-      .asInstanceOf[Filter].condition
-
-    checkForUnresolvedRefs(resolvedExpr)
-  }
-
-  private def checkForUnresolvedRefs(resolvedExpr: Expression): Expression =
-    resolvedExpr match {
-      case UnresolvedAttribute(_) => throw new IllegalStateException("unresolved attribute")
-      case _ => resolvedExpr.mapChildren(e => checkForUnresolvedRefs(e))
-    }
-
-  /**
-   * Split the given predicates into two sequence predicates:
-   * - predicates that references partition columns only(and involves no sub-query);
-   * - other predicates.
-   *
-   * @param sparkSession     The spark session
-   * @param predicates       The predicates to be split
-   * @param partitionColumns The partition columns
-   * @return (partitionFilters, dataFilters)
-   */
-  def splitPartitionAndDataPredicates(sparkSession: SparkSession,
-                                      predicates: Array[Expression],
-                                      partitionColumns: Array[String]): (Array[Expression], Array[Expression]) = {
-    // Validates that the provided names both resolve to the same entity
-    val resolvedNameEquals = sparkSession.sessionState.analyzer.resolver
-
-    predicates.partition(expr => {
-      // Checks whether given expression only references partition columns(and involves no sub-query)
-      expr.references.forall(r => partitionColumns.exists(resolvedNameEquals(r.name, _))) &&
-        !SubqueryExpression.hasSubquery(expr)
-    })
-  }
-}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieSparkTypeUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieSparkTypeUtils.scala
new file mode 100644
index 0000000000000..3b0fcf0f322f1
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieSparkTypeUtils.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.types.{DataType, DecimalType, NumericType, StringType}
+
+// TODO unify w/ DataTypeUtils
+object HoodieSparkTypeUtils {
+
+  /**
+   * Returns whether this DecimalType is wider than `other`. If yes, it means `other`
+   * can be casted into `this` safely without losing any precision or range.
+   */
+  def isWiderThan(one: DecimalType, another: DecimalType) =
+    one.isWiderThan(another)
+
+  /**
+   * Checks whether casting expression of [[from]] [[DataType]] to [[to]] [[DataType]] will
+   * preserve ordering of the elements
+   */
+  def isCastPreservingOrdering(from: DataType, to: DataType): Boolean =
+    (from, to) match {
+      // NOTE: In the casting rules defined by Spark, only casting from String to Numeric
+      // (and vice versa) are the only casts that might break the ordering of the elements after casting
+      case (StringType, _: NumericType) => false
+      case (_: NumericType, StringType) => false
+
+      case _ => true
+    }
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/HoodieUnsafeRDDUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieUnsafeRDDUtils.scala
similarity index 61%
rename from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/HoodieUnsafeRDDUtils.scala
rename to hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieUnsafeRDDUtils.scala
index 1ac8fa098119f..8995701d5fc5c 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/HoodieUnsafeRDDUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieUnsafeRDDUtils.scala
@@ -1,12 +1,13 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,10 +16,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark
+package org.apache.spark.sql
 
 import org.apache.hudi.HoodieUnsafeRDD
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.MutablePair
 
 /**
@@ -26,6 +29,10 @@ import org.apache.spark.util.MutablePair
  */
 object HoodieUnsafeRDDUtils {
 
+  // TODO scala-doc
+  def createDataFrame(spark: SparkSession, rdd: RDD[InternalRow], structType: StructType): DataFrame =
+    spark.internalCreateDataFrame(rdd, structType)
+
   /**
    * Canonical implementation of the [[RDD#collect]] for [[HoodieUnsafeRDD]], returning a properly
    * copied [[Array]] of [[InternalRow]]s
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieSparkAvroSchemaConverters.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieSparkAvroSchemaConverters.scala
new file mode 100644
index 0000000000000..65306ac44686b
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieSparkAvroSchemaConverters.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.avro.SchemaConverters.SchemaType
+import org.apache.spark.sql.types.DataType
+
+/**
+ * This interface is simply a facade abstracting away Spark's [[SchemaConverters]] implementation, allowing
+ * the rest of the code-base to not depend on it directly
+ */
+object HoodieSparkAvroSchemaConverters extends HoodieAvroSchemaConverters {
+
+  override def toSqlType(avroSchema: Schema): (DataType, Boolean) =
+    SchemaConverters.toSqlType(avroSchema) match {
+      case SchemaType(dataType, nullable) => (dataType, nullable)
+    }
+
+  override def toAvroType(catalystType: DataType, nullable: Boolean, recordName: String, nameSpace: String): Schema =
+    SchemaConverters.toAvroType(catalystType, nullable, recordName, nameSpace)
+
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
new file mode 100644
index 0000000000000..a5b519b0e0189
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.LogicalTypes.{Date, Decimal, TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema.Type._
+import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.types.Decimal.minBytesForPrecision
+import org.apache.spark.sql.types._
+
+import scala.collection.JavaConverters._
+
+/**
+ * This object contains method that are used to convert sparkSQL schemas to avro schemas and vice
+ * versa.
+ *
+ * NOTE: This code is borrowed from Spark 3.2.1
+ *       This code is borrowed, so that we can better control compatibility w/in Spark minor
+ *       branches (3.2.x, 3.1.x, etc)
+ *
+ *       PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+@DeveloperApi
+private[sql] object SchemaConverters {
+  private lazy val nullSchema = Schema.create(Schema.Type.NULL)
+
+  /**
+   * Internal wrapper for SQL data type and nullability.
+   *
+   * @since 2.4.0
+   */
+  case class SchemaType(dataType: DataType, nullable: Boolean)
+
+  /**
+   * Converts an Avro schema to a corresponding Spark SQL schema.
+   *
+   * @since 2.4.0
+   */
+  def toSqlType(avroSchema: Schema): SchemaType = {
+    toSqlTypeHelper(avroSchema, Set.empty)
+  }
+
+  private def toSqlTypeHelper(avroSchema: Schema, existingRecordNames: Set[String]): SchemaType = {
+    avroSchema.getType match {
+      case INT => avroSchema.getLogicalType match {
+        case _: Date => SchemaType(DateType, nullable = false)
+        case _ => SchemaType(IntegerType, nullable = false)
+      }
+      case STRING => SchemaType(StringType, nullable = false)
+      case BOOLEAN => SchemaType(BooleanType, nullable = false)
+      case BYTES | FIXED => avroSchema.getLogicalType match {
+        // For FIXED type, if the precision requires more bytes than fixed size, the logical
+        // type will be null, which is handled by Avro library.
+        case d: Decimal => SchemaType(DecimalType(d.getPrecision, d.getScale), nullable = false)
+        case _ => SchemaType(BinaryType, nullable = false)
+      }
+
+      case DOUBLE => SchemaType(DoubleType, nullable = false)
+      case FLOAT => SchemaType(FloatType, nullable = false)
+      case LONG => avroSchema.getLogicalType match {
+        case _: TimestampMillis | _: TimestampMicros => SchemaType(TimestampType, nullable = false)
+        case _ => SchemaType(LongType, nullable = false)
+      }
+
+      case ENUM => SchemaType(StringType, nullable = false)
+
+      case NULL => SchemaType(NullType, nullable = true)
+
+      case RECORD =>
+        if (existingRecordNames.contains(avroSchema.getFullName)) {
+          throw new IncompatibleSchemaException(
+            s"""
+               |Found recursive reference in Avro schema, which can not be processed by Spark:
+               |${avroSchema.toString(true)}
+          """.stripMargin)
+        }
+        val newRecordNames = existingRecordNames + avroSchema.getFullName
+        val fields = avroSchema.getFields.asScala.map { f =>
+          val schemaType = toSqlTypeHelper(f.schema(), newRecordNames)
+          StructField(f.name, schemaType.dataType, schemaType.nullable)
+        }
+
+        SchemaType(StructType(fields.toSeq), nullable = false)
+
+      case ARRAY =>
+        val schemaType = toSqlTypeHelper(avroSchema.getElementType, existingRecordNames)
+        SchemaType(
+          ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
+          nullable = false)
+
+      case MAP =>
+        val schemaType = toSqlTypeHelper(avroSchema.getValueType, existingRecordNames)
+        SchemaType(
+          MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
+          nullable = false)
+
+      case UNION =>
+        if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
+          // In case of a union with null, eliminate it and make a recursive call
+          val remainingUnionTypes = avroSchema.getTypes.asScala.filterNot(_.getType == NULL)
+          if (remainingUnionTypes.size == 1) {
+            toSqlTypeHelper(remainingUnionTypes.head, existingRecordNames).copy(nullable = true)
+          } else {
+            toSqlTypeHelper(Schema.createUnion(remainingUnionTypes.asJava), existingRecordNames)
+              .copy(nullable = true)
+          }
+        } else avroSchema.getTypes.asScala.map(_.getType).toSeq match {
+          case Seq(t1) =>
+            toSqlTypeHelper(avroSchema.getTypes.get(0), existingRecordNames)
+          case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
+            SchemaType(LongType, nullable = false)
+          case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
+            SchemaType(DoubleType, nullable = false)
+          case _ =>
+            // Convert complex unions to struct types where field names are member0, member1, etc.
+            // This is consistent with the behavior when converting between Avro and Parquet.
+            val fields = avroSchema.getTypes.asScala.zipWithIndex.map {
+              case (s, i) =>
+                val schemaType = toSqlTypeHelper(s, existingRecordNames)
+                // All fields are nullable because only one of them is set at a time
+                StructField(s"member$i", schemaType.dataType, nullable = true)
+            }
+
+            SchemaType(StructType(fields.toSeq), nullable = false)
+        }
+
+      case other => throw new IncompatibleSchemaException(s"Unsupported type $other")
+    }
+  }
+
+  /**
+   * Converts a Spark SQL schema to a corresponding Avro schema.
+   *
+   * @since 2.4.0
+   */
+  def toAvroType(catalystType: DataType,
+                 nullable: Boolean = false,
+                 recordName: String = "topLevelRecord",
+                 nameSpace: String = ""): Schema = {
+    val builder = SchemaBuilder.builder()
+
+    val schema = catalystType match {
+      case BooleanType => builder.booleanType()
+      case ByteType | ShortType | IntegerType => builder.intType()
+      case LongType => builder.longType()
+      case DateType =>
+        LogicalTypes.date().addToSchema(builder.intType())
+      case TimestampType =>
+        LogicalTypes.timestampMicros().addToSchema(builder.longType())
+
+      case FloatType => builder.floatType()
+      case DoubleType => builder.doubleType()
+      case StringType => builder.stringType()
+      case NullType => builder.nullType()
+      case d: DecimalType =>
+        val avroType = LogicalTypes.decimal(d.precision, d.scale)
+        val fixedSize = minBytesForPrecision(d.precision)
+        // Need to avoid naming conflict for the fixed fields
+        val name = nameSpace match {
+          case "" => s"$recordName.fixed"
+          case _ => s"$nameSpace.$recordName.fixed"
+        }
+        avroType.addToSchema(SchemaBuilder.fixed(name).size(fixedSize))
+
+      case BinaryType => builder.bytesType()
+      case ArrayType(et, containsNull) =>
+        builder.array()
+          .items(toAvroType(et, containsNull, recordName, nameSpace))
+      case MapType(StringType, vt, valueContainsNull) =>
+        builder.map()
+          .values(toAvroType(vt, valueContainsNull, recordName, nameSpace))
+      case st: StructType =>
+        val childNameSpace = if (nameSpace != "") s"$nameSpace.$recordName" else recordName
+        val fieldsAssembler = builder.record(recordName).namespace(nameSpace).fields()
+        st.foreach { f =>
+          val fieldAvroType =
+            toAvroType(f.dataType, f.nullable, f.name, childNameSpace)
+          fieldsAssembler.name(f.name).`type`(fieldAvroType).noDefault()
+        }
+        fieldsAssembler.endRecord()
+
+      // This should never happen.
+      case other => throw new IncompatibleSchemaException(s"Unexpected type $other.")
+    }
+    if (nullable && catalystType != NullType) {
+      Schema.createUnion(schema, nullSchema)
+    } else {
+      schema
+    }
+  }
+}
+
+private[avro] class IncompatibleSchemaException(msg: String, ex: Throwable = null) extends Exception(msg, ex)
+
+private[avro] class UnsupportedAvroTypeException(msg: String) extends Exception(msg)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
index 98823d14222d9..7ee8f6ad569b2 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
@@ -110,6 +110,11 @@ class HoodieCatalogTable(val spark: SparkSession, val table: CatalogTable) exten
    */
   lazy val partitionFields: Array[String] = tableConfig.getPartitionFields.orElse(Array.empty)
 
+  /**
+   * BaseFileFormat
+   */
+  lazy val baseFileFormat: String = metaClient.getTableConfig.getBaseFileFormat.name()
+
   /**
    * The schema of table.
    * Make StructField nullable and fill the comments in.
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkHoodieParquetFileFormat.scala
new file mode 100644
index 0000000000000..150178ea69066
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkHoodieParquetFileFormat.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hudi.SparkAdapterSupport
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+
+
+class SparkHoodieParquetFileFormat extends ParquetFileFormat with SparkAdapterSupport {
+  override def shortName(): String = "HoodieParquet"
+
+  override def toString: String = "HoodieParquet"
+
+  override def buildReaderWithPartitionValues(
+                                               sparkSession: SparkSession,
+                                               dataSchema: StructType,
+                                               partitionSchema: StructType,
+                                               requiredSchema: StructType,
+                                               filters: Seq[Filter],
+                                               options: Map[String, String],
+                                               hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    sparkAdapter
+      .createHoodieParquetFileFormat().get
+      .buildReaderWithPartitionValues(sparkSession, dataSchema, partitionSchema, requiredSchema, filters, options, hadoopConf)
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala
index 06b92e204fbed..4db94e5b23d9a 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.sql.hudi
 
-import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper.{getMaxColumnNameFor, getMinColumnNameFor, getNumNullsColumnNameFor}
+import org.apache.hudi.ColumnStatsIndexSupport.{getMaxColumnNameFor, getMinColumnNameFor, getNullCountColumnNameFor, getValueCountColumnNameFor}
+import org.apache.hudi.SparkAdapterSupport
+import org.apache.hudi.common.util.ValidationUtils.checkState
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, EqualNullSafe, EqualTo, Expression, ExtractValue, GetStructField, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not, Or, StartsWith}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, EqualNullSafe, EqualTo, Expression, ExtractValue, GetStructField, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not, Or, StartsWith, SubqueryExpression}
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.hudi.ColumnStatsExpressionUtils._
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{AnalysisException, HoodieCatalystExpressionUtils}
 import org.apache.spark.unsafe.types.UTF8String
 
 object DataSkippingUtils extends Logging {
@@ -59,147 +62,205 @@ object DataSkippingUtils extends Logging {
   }
 
   private def tryComposeIndexFilterExpr(sourceExpr: Expression, indexSchema: StructType): Option[Expression] = {
-    def minValue(colName: String) = col(getMinColumnNameFor(colName)).expr
-    def maxValue(colName: String) = col(getMaxColumnNameFor(colName)).expr
-    def numNulls(colName: String) = col(getNumNullsColumnNameFor(colName)).expr
-
-    def colContainsValuesEqualToLiteral(colName: String, value: Literal): Expression =
-    // Only case when column C contains value V is when min(C) <= V <= max(c)
-      And(LessThanOrEqual(minValue(colName), value), GreaterThanOrEqual(maxValue(colName), value))
-
-    def colContainsOnlyValuesEqualToLiteral(colName: String, value: Literal) =
-    // Only case when column C contains _only_ value V is when min(C) = V AND max(c) = V
-      And(EqualTo(minValue(colName), value), EqualTo(maxValue(colName), value))
-
+    //
+    // For translation of the Filter Expression for the Data Table into Filter Expression for Column Stats Index, we're
+    // assuming that
+    //    - The column A is queried in the Data Table (hereafter referred to as "colA")
+    //    - Filter Expression is a relational expression (ie "=", "<", "<=", ...) of the following form
+    //
+    //      ```transform_expr(colA) = value_expr```
+    //
+    //      Where
+    //        - "transform_expr" is an expression of the _transformation_ which preserve ordering of the "colA"
+    //        - "value_expr" is an "value"-expression (ie one NOT referring to other attributes/columns or containing sub-queries)
+    //
+    // We translate original Filter Expr into the one querying Column Stats Index like following: let's consider
+    // equality Filter Expr referred to above:
+    //
+    //   ```transform_expr(colA) = value_expr```
+    //
+    // This expression will be translated into following Filter Expression for the Column Stats Index:
+    //
+    //   ```(transform_expr(colA_minValue) <= value_expr) AND (value_expr <= transform_expr(colA_maxValue))```
+    //
+    // Which will enable us to match files with the range of values in column A containing the target ```value_expr```
+    //
+    // NOTE: That we can apply ```transform_expr``` transformation precisely b/c it preserves the ordering of the
+    //       values of the source column, ie following holds true:
+    //
+    //       colA_minValue = min(colA)  =>  transform_expr(colA_minValue) = min(transform_expr(colA))
+    //       colA_maxValue = max(colA)  =>  transform_expr(colA_maxValue) = max(transform_expr(colA))
+    //
     sourceExpr match {
-      // Filter "colA = b"
-      // Translates to "colA_minValue <= b AND colA_maxValue >= b" condition for index lookup
-      case EqualTo(attribute: AttributeReference, value: Literal) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => colContainsValuesEqualToLiteral(colName, value))
-
-      // Filter "b = colA"
-      // Translates to "colA_minValue <= b AND colA_maxValue >= b" condition for index lookup
-      case EqualTo(value: Literal, attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => colContainsValuesEqualToLiteral(colName, value))
-
-      // Filter "colA != b"
-      // Translates to "NOT(colA_minValue = b AND colA_maxValue = b)"
-      // NOTE: This is NOT an inversion of `colA = b`
-      case Not(EqualTo(attribute: AttributeReference, value: Literal)) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => Not(colContainsOnlyValuesEqualToLiteral(colName, value)))
-
-      // Filter "b != colA"
-      // Translates to "NOT(colA_minValue = b AND colA_maxValue = b)"
-      // NOTE: This is NOT an inversion of `colA = b`
-      case Not(EqualTo(value: Literal, attribute: AttributeReference)) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => Not(colContainsOnlyValuesEqualToLiteral(colName, value)))
+      // If Expression is not resolved, we can't perform the analysis accurately, bailing
+      case expr if !expr.resolved => None
+
+      // Filter "expr(colA) = B" and "B = expr(colA)"
+      // Translates to "(expr(colA_minValue) <= B) AND (B <= expr(colA_maxValue))" condition for index lookup
+      case EqualTo(sourceExpr @ AllowedTransformationExpression(attrRef), valueExpr: Expression) if isValueExpression(valueExpr) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            // NOTE: Since we're supporting (almost) arbitrary expressions of the form `f(colA) = B`, we have to
+            //       appropriately translate such original expression targeted at Data Table, to corresponding
+            //       expression targeted at Column Stats Index Table. For that, we take original expression holding
+            //       [[AttributeReference]] referring to the Data Table, and swap it w/ expression referring to
+            //       corresponding column in the Column Stats Index
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            genColumnValuesEqualToExpression(colName, valueExpr, targetExprBuilder)
+          }
+
+      case EqualTo(valueExpr: Expression, sourceExpr @ AllowedTransformationExpression(attrRef)) if isValueExpression(valueExpr) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            genColumnValuesEqualToExpression(colName, valueExpr, targetExprBuilder)
+          }
+
+      // Filter "expr(colA) != B" and "B != expr(colA)"
+      // Translates to "NOT(expr(colA_minValue) = B AND expr(colA_maxValue) = B)"
+      // NOTE: This is NOT an inversion of `colA = b`, instead this filter ONLY excludes files for which `colA = B`
+      //       holds true
+      case Not(EqualTo(sourceExpr @ AllowedTransformationExpression(attrRef), value: Expression)) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            Not(genColumnOnlyValuesEqualToExpression(colName, value, targetExprBuilder))
+          }
+
+      case Not(EqualTo(value: Expression, sourceExpr @ AllowedTransformationExpression(attrRef))) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            Not(genColumnOnlyValuesEqualToExpression(colName, value, targetExprBuilder))
+          }
 
       // Filter "colA = null"
-      // Translates to "colA_num_nulls = null" for index lookup
-      case equalNullSafe @ EqualNullSafe(_: AttributeReference, _ @ Literal(null, _)) =>
-        getTargetIndexedColName(equalNullSafe.left, indexSchema)
-          .map(colName => EqualTo(numNulls(colName), equalNullSafe.right))
-
-      // Filter "colA < b"
-      // Translates to "colA_minValue < b" for index lookup
-      case LessThan(attribute: AttributeReference, value: Literal) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => LessThan(minValue(colName), value))
-
-      // Filter "b > colA"
-      // Translates to "b > colA_minValue" for index lookup
-      case GreaterThan(value: Literal, attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => LessThan(minValue(colName), value))
-
-      // Filter "b < colA"
-      // Translates to "b < colA_maxValue" for index lookup
-      case LessThan(value: Literal, attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => GreaterThan(maxValue(colName), value))
-
-      // Filter "colA > b"
-      // Translates to "colA_maxValue > b" for index lookup
-      case GreaterThan(attribute: AttributeReference, value: Literal) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => GreaterThan(maxValue(colName), value))
-
-      // Filter "colA <= b"
-      // Translates to "colA_minValue <= b" for index lookup
-      case LessThanOrEqual(attribute: AttributeReference, value: Literal) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => LessThanOrEqual(minValue(colName), value))
-
-      // Filter "b >= colA"
-      // Translates to "b >= colA_minValue" for index lookup
-      case GreaterThanOrEqual(value: Literal, attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => LessThanOrEqual(minValue(colName), value))
-
-      // Filter "b <= colA"
-      // Translates to "b <= colA_maxValue" for index lookup
-      case LessThanOrEqual(value: Literal, attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => GreaterThanOrEqual(maxValue(colName), value))
-
-      // Filter "colA >= b"
-      // Translates to "colA_maxValue >= b" for index lookup
-      case GreaterThanOrEqual(attribute: AttributeReference, right: Literal) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => GreaterThanOrEqual(maxValue(colName), right))
+      // Translates to "colA_nullCount = null" for index lookup
+      case EqualNullSafe(attrRef: AttributeReference, litNull @ Literal(null, _)) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map(colName => EqualTo(genColNumNullsExpr(colName), litNull))
+
+      // Filter "expr(colA) < B" and "B > expr(colA)"
+      // Translates to "expr(colA_minValue) < B" for index lookup
+      case LessThan(sourceExpr @ AllowedTransformationExpression(attrRef), value: Expression) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            LessThan(targetExprBuilder.apply(genColMinValueExpr(colName)), value)
+          }
+
+      case GreaterThan(value: Expression, sourceExpr @ AllowedTransformationExpression(attrRef)) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            LessThan(targetExprBuilder.apply(genColMinValueExpr(colName)), value)
+          }
+
+      // Filter "B < expr(colA)" and "expr(colA) > B"
+      // Translates to "B < colA_maxValue" for index lookup
+      case LessThan(value: Expression, sourceExpr @ AllowedTransformationExpression(attrRef)) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            GreaterThan(targetExprBuilder.apply(genColMaxValueExpr(colName)), value)
+          }
+
+      case GreaterThan(sourceExpr @ AllowedTransformationExpression(attrRef), value: Expression) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            GreaterThan(targetExprBuilder.apply(genColMaxValueExpr(colName)), value)
+          }
+
+      // Filter "expr(colA) <= B" and "B >= expr(colA)"
+      // Translates to "colA_minValue <= B" for index lookup
+      case LessThanOrEqual(sourceExpr @ AllowedTransformationExpression(attrRef), value: Expression) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            LessThanOrEqual(targetExprBuilder.apply(genColMinValueExpr(colName)), value)
+          }
+
+      case GreaterThanOrEqual(value: Expression, sourceExpr @ AllowedTransformationExpression(attrRef)) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            LessThanOrEqual(targetExprBuilder.apply(genColMinValueExpr(colName)), value)
+          }
+
+      // Filter "B <= expr(colA)" and "expr(colA) >= B"
+      // Translates to "B <= colA_maxValue" for index lookup
+      case LessThanOrEqual(value: Expression, sourceExpr @ AllowedTransformationExpression(attrRef)) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            GreaterThanOrEqual(targetExprBuilder.apply(genColMaxValueExpr(colName)), value)
+          }
+
+      case GreaterThanOrEqual(sourceExpr @ AllowedTransformationExpression(attrRef), value: Expression) if isValueExpression(value) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            GreaterThanOrEqual(targetExprBuilder.apply(genColMaxValueExpr(colName)), value)
+          }
 
       // Filter "colA is null"
-      // Translates to "colA_num_nulls > 0" for index lookup
+      // Translates to "colA_nullCount > 0" for index lookup
       case IsNull(attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => GreaterThan(numNulls(colName), Literal(0)))
+        getTargetIndexedColumnName(attribute, indexSchema)
+          .map(colName => GreaterThan(genColNumNullsExpr(colName), Literal(0)))
 
       // Filter "colA is not null"
-      // Translates to "colA_num_nulls = 0" for index lookup
+      // Translates to "colA_nullCount < colA_valueCount" for index lookup
       case IsNotNull(attribute: AttributeReference) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => EqualTo(numNulls(colName), Literal(0)))
-
-      // Filter "colA in (a, b, ...)"
-      // Translates to "(colA_minValue <= a AND colA_maxValue >= a) OR (colA_minValue <= b AND colA_maxValue >= b)" for index lookup
-      // NOTE: This is equivalent to "colA = a OR colA = b OR ..."
-      case In(attribute: AttributeReference, list: Seq[Literal]) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName =>
-            list.map { lit => colContainsValuesEqualToLiteral(colName, lit) }.reduce(Or)
-          )
-
-      // Filter "colA not in (a, b, ...)"
-      // Translates to "NOT((colA_minValue = a AND colA_maxValue = a) OR (colA_minValue = b AND colA_maxValue = b))" for index lookup
-      // NOTE: This is NOT an inversion of `in (a, b, ...)` expr, this is equivalent to "colA != a AND colA != b AND ..."
-      case Not(In(attribute: AttributeReference, list: Seq[Literal])) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName =>
-            Not(
-              list.map { lit => colContainsOnlyValuesEqualToLiteral(colName, lit) }.reduce(Or)
-            )
-          )
+        getTargetIndexedColumnName(attribute, indexSchema)
+          .map(colName => LessThan(genColNumNullsExpr(colName), genColValueCountExpr))
+
+      // Filter "expr(colA) in (B1, B2, ...)"
+      // Translates to "(colA_minValue <= B1 AND colA_maxValue >= B1) OR (colA_minValue <= B2 AND colA_maxValue >= B2) ... "
+      // for index lookup
+      // NOTE: This is equivalent to "colA = B1 OR colA = B2 OR ..."
+      case In(sourceExpr @ AllowedTransformationExpression(attrRef), list: Seq[Expression]) if list.forall(isValueExpression) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            list.map(lit => genColumnValuesEqualToExpression(colName, lit, targetExprBuilder)).reduce(Or)
+          }
+
+      // Filter "expr(colA) not in (B1, B2, ...)"
+      // Translates to "NOT((colA_minValue = B1 AND colA_maxValue = B1) OR (colA_minValue = B2 AND colA_maxValue = B2))" for index lookup
+      // NOTE: This is NOT an inversion of `in (B1, B2, ...)` expr, this is equivalent to "colA != B1 AND colA != B2 AND ..."
+      case Not(In(sourceExpr @ AllowedTransformationExpression(attrRef), list: Seq[Expression])) if list.forall(_.foldable) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            Not(list.map(lit => genColumnOnlyValuesEqualToExpression(colName, lit, targetExprBuilder)).reduce(Or))
+          }
 
       // Filter "colA like 'xxx%'"
-      // Translates to "colA_minValue <= xxx AND colA_maxValue >= xxx" for index lookup
-      // NOTE: That this operator only matches string prefixes, and this is
-      //       essentially equivalent to "colA = b" expression
-      case StartsWith(attribute, v @ Literal(_: UTF8String, _)) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName => colContainsValuesEqualToLiteral(colName, v))
-
-      // Filter "colA not like 'xxx%'"
-      // Translates to "NOT(colA_minValue like 'xxx%' AND colA_maxValue like 'xxx%')" for index lookup
+      // Translates to "colA_minValue <= xxx AND xxx <= colA_maxValue" for index lookup
+      //
+      // NOTE: Since a) this operator matches strings by prefix and b) given that this column is going to be ordered
+      //       lexicographically, we essentially need to check that provided literal falls w/in min/max bounds of the
+      //       given column
+      case StartsWith(sourceExpr @ AllowedTransformationExpression(attrRef), v @ Literal(_: UTF8String, _)) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            genColumnValuesEqualToExpression(colName, v, targetExprBuilder)
+          }
+
+      // Filter "expr(colA) not like 'xxx%'"
+      // Translates to "NOT(expr(colA_minValue) like 'xxx%' AND expr(colA_maxValue) like 'xxx%')" for index lookup
       // NOTE: This is NOT an inversion of "colA like xxx"
-      case Not(StartsWith(attribute, value @ Literal(_: UTF8String, _))) =>
-        getTargetIndexedColName(attribute, indexSchema)
-          .map(colName =>
-            Not(And(StartsWith(minValue(colName), value), StartsWith(maxValue(colName), value)))
-          )
+      case Not(StartsWith(sourceExpr @ AllowedTransformationExpression(attrRef), value @ Literal(_: UTF8String, _))) =>
+        getTargetIndexedColumnName(attrRef, indexSchema)
+          .map { colName =>
+            val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _)
+            val minValueExpr = targetExprBuilder.apply(genColMinValueExpr(colName))
+            val maxValueExpr = targetExprBuilder.apply(genColMaxValueExpr(colName))
+            Not(And(StartsWith(minValueExpr, value), StartsWith(maxValueExpr, value)))
+          }
 
       case or: Or =>
         val resLeft = createColumnStatsIndexFilterExprInternal(or.left, indexSchema)
@@ -233,12 +294,12 @@ object DataSkippingUtils extends Logging {
     Set.apply(
       getMinColumnNameFor(colName),
       getMaxColumnNameFor(colName),
-      getNumNullsColumnNameFor(colName)
+      getNullCountColumnNameFor(colName)
     )
       .forall(stat => indexSchema.exists(_.name == stat))
   }
 
-  private def getTargetIndexedColName(resolvedExpr: Expression, indexSchema: StructType): Option[String] = {
+  private def getTargetIndexedColumnName(resolvedExpr: AttributeReference, indexSchema: StructType): Option[String] = {
     val colName = UnresolvedAttribute(getTargetColNameParts(resolvedExpr)).name
 
     // Verify that the column is indexed
@@ -261,3 +322,83 @@ object DataSkippingUtils extends Logging {
     }
   }
 }
+
+private object ColumnStatsExpressionUtils {
+
+  @inline def genColMinValueExpr(colName: String): Expression = col(getMinColumnNameFor(colName)).expr
+  @inline def genColMaxValueExpr(colName: String): Expression = col(getMaxColumnNameFor(colName)).expr
+  @inline def genColNumNullsExpr(colName: String): Expression = col(getNullCountColumnNameFor(colName)).expr
+  @inline def genColValueCountExpr: Expression = col(getValueCountColumnNameFor).expr
+
+  @inline def genColumnValuesEqualToExpression(colName: String,
+                                       value: Expression,
+                                       targetExprBuilder: Function[Expression, Expression] = Predef.identity): Expression = {
+    val minValueExpr = targetExprBuilder.apply(genColMinValueExpr(colName))
+    val maxValueExpr = targetExprBuilder.apply(genColMaxValueExpr(colName))
+    // Only case when column C contains value V is when min(C) <= V <= max(c)
+    And(LessThanOrEqual(minValueExpr, value), GreaterThanOrEqual(maxValueExpr, value))
+  }
+
+  def genColumnOnlyValuesEqualToExpression(colName: String,
+                                           value: Expression,
+                                           targetExprBuilder: Function[Expression, Expression] = Predef.identity): Expression = {
+    val minValueExpr = targetExprBuilder.apply(genColMinValueExpr(colName))
+    val maxValueExpr = targetExprBuilder.apply(genColMaxValueExpr(colName))
+    // Only case when column C contains _only_ value V is when min(C) = V AND max(c) = V
+    And(EqualTo(minValueExpr, value), EqualTo(maxValueExpr, value))
+  }
+
+  def swapAttributeRefInExpr(sourceExpr: Expression, from: AttributeReference, to: Expression): Expression = {
+    checkState(sourceExpr.references.size == 1)
+    sourceExpr.transformDown {
+      case attrRef: AttributeReference if attrRef.sameRef(from) => to
+    }
+  }
+
+  /**
+   * This check is used to validate that the expression that target column is compared against
+   * <pre>
+   *    a) Has no references to other attributes (for ex, columns)
+   *    b) Does not contain sub-queries
+   * </pre>
+   *
+   * This in turn allows us to be certain that Spark will be able to evaluate such expression
+   * against Column Stats Index as well
+   */
+  def isValueExpression(expr: Expression): Boolean =
+    expr.references.isEmpty && !SubqueryExpression.hasSubquery(expr)
+
+  /**
+   * This utility pattern-matches an expression iff
+   *
+   * <ol>
+   *   <li>It references *exactly* 1 attribute (column)</li>
+   *   <li>It does NOT contain sub-queries</li>
+   *   <li>It contains only whitelisted transformations that preserve ordering of the source column [1]</li>
+   * </ol>
+   *
+   * [1] This is required to make sure that we can correspondingly map Column Stats Index values as well. Applying
+   * transformations that do not preserve the ordering might lead to incorrect results being returned by Data
+   * Skipping flow.
+   *
+   * Returns only [[AttributeReference]] contained as a sub-expression
+   */
+  object AllowedTransformationExpression extends SparkAdapterSupport {
+    val exprUtils: HoodieCatalystExpressionUtils = sparkAdapter.createCatalystExpressionUtils()
+
+    def unapply(expr: Expression): Option[AttributeReference] = {
+      // First step, we check that expression
+      //    - Does NOT contain sub-queries
+      //    - Does contain exactly 1 attribute
+      if (SubqueryExpression.hasSubquery(expr) || expr.references.size != 1) {
+        None
+      } else {
+        // Second step, we validate that holding expression is an actually permitted
+        // transformation
+        // NOTE: That transformation composition is permitted
+        exprUtils.tryMatchAttributeOrderingPreservingTransformation(expr)
+      }
+    }
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
index 1e1e9c663e54f..fcdbacea51e43 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
@@ -19,30 +19,28 @@ package org.apache.spark.sql.hudi
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-
 import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.config.{DFSPropertiesConfiguration, HoodieMetadataConfig}
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstantTimeGenerator}
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.util.PartitionPathEncodeUtils
 import org.apache.hudi.{AvroConversionUtils, SparkAdapterSupport}
-
 import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, HoodieCatalogTable}
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Cast, Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{DataType, NullType, StringType, StructField, StructType}
-import org.apache.spark.sql.{Column, DataFrame, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, SparkSession}
 
 import java.net.URI
 import java.text.SimpleDateFormat
 import java.util.{Locale, Properties}
-
 import scala.collection.JavaConverters._
 import scala.collection.immutable.Map
 
@@ -321,4 +319,57 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
         Cast(child, dataType, Option(conf.sessionLocalTimeZone)) else child
     }
   }
+
+  def normalizePartitionSpec[T](
+                                 partitionSpec: Map[String, T],
+                                 partColNames: Seq[String],
+                                 tblName: String,
+                                 resolver: Resolver): Map[String, T] = {
+    val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) =>
+      val normalizedKey = partColNames.find(resolver(_, key)).getOrElse {
+        throw new AnalysisException(s"$key is not a valid partition column in table $tblName.")
+      }
+      normalizedKey -> value
+    }
+
+    if (normalizedPartSpec.size < partColNames.size) {
+      throw new AnalysisException(
+        "All partition columns need to be specified for Hoodie's partition")
+    }
+
+    val lowerPartColNames = partColNames.map(_.toLowerCase)
+    if (lowerPartColNames.distinct.length != lowerPartColNames.length) {
+      val duplicateColumns = lowerPartColNames.groupBy(identity).collect {
+        case (x, ys) if ys.length > 1 => s"`$x`"
+      }
+      throw new AnalysisException(
+        s"Found duplicate column(s) in the partition schema: ${duplicateColumns.mkString(", ")}")
+    }
+
+    normalizedPartSpec.toMap
+  }
+
+  def getPartitionPathToDrop(
+                              hoodieCatalogTable: HoodieCatalogTable,
+                              normalizedSpecs: Seq[Map[String, String]]): String = {
+    val table = hoodieCatalogTable.table
+    val allPartitionPaths = hoodieCatalogTable.getPartitionPaths
+    val enableHiveStylePartitioning = isHiveStyledPartitioning(allPartitionPaths, table)
+    val enableEncodeUrl = isUrlEncodeEnabled(allPartitionPaths, table)
+    val partitionsToDrop = normalizedSpecs.map { spec =>
+      hoodieCatalogTable.partitionFields.map { partitionColumn =>
+        val encodedPartitionValue = if (enableEncodeUrl) {
+          PartitionPathEncodeUtils.escapePathName(spec(partitionColumn))
+        } else {
+          spec(partitionColumn)
+        }
+        if (enableHiveStylePartitioning) {
+          partitionColumn + "=" + encodedPartitionValue
+        } else {
+          encodedPartitionValue
+        }
+      }.mkString("/")
+    }.mkString(",")
+    partitionsToDrop
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
index d6745b6795032..31fb0ad6cb0cf 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
@@ -17,22 +17,31 @@
 
 package org.apache.spark.sql.hudi
 
-import org.apache.hudi.DataSourceWriteOptions
 import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.common.config.TypedProperties
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
-import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-import org.apache.hudi.hive.MultiPartKeysValueExtractor
+import org.apache.hudi.config.{HoodieIndexConfig, HoodieWriteConfig}
 import org.apache.hudi.hive.ddl.HiveSyncMode
+import org.apache.hudi.hive.{HiveSyncConfig, MultiPartKeysValueExtractor}
 import org.apache.hudi.keygen.ComplexKeyGenerator
 import org.apache.hudi.sql.InsertMode
+import org.apache.hudi.sync.common.HoodieSyncConfig
+import org.apache.hudi.{DataSourceWriteOptions, HoodieWriterUtils}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
+import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{isEnableHive, withSparkConf}
 import org.apache.spark.sql.hudi.command.{SqlKeyGenerator, ValidateDuplicateKeyPayload}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
 
-import scala.collection.JavaConverters.propertiesAsScalaMapConverter
+import java.util
+import java.util.Locale
+
+import scala.collection.JavaConverters._
 
 trait ProvidesHoodieConfig extends Logging {
 
@@ -40,7 +49,6 @@ trait ProvidesHoodieConfig extends Logging {
     val sparkSession: SparkSession = hoodieCatalogTable.spark
     val catalogProperties = hoodieCatalogTable.catalogProperties
     val tableConfig = hoodieCatalogTable.tableConfig
-    val tableId = hoodieCatalogTable.table.identifier
 
     // NOTE: Here we fallback to "" to make sure that null value is not overridden with
     // default value ("ts")
@@ -51,6 +59,10 @@ trait ProvidesHoodieConfig extends Logging {
       s"There are no primary key in table ${hoodieCatalogTable.table.identifier}, cannot execute update operator")
     val enableHive = isEnableHive(sparkSession)
 
+    val hoodieProps = getHoodieProps(catalogProperties, tableConfig, sparkSession.sqlContext.conf)
+
+    val hiveSyncConfig = buildHiveSyncConfig(hoodieProps, hoodieCatalogTable)
+
     withSparkConf(sparkSession, catalogProperties) {
       Map.apply(
         "path" -> hoodieCatalogTable.tableLocation,
@@ -63,15 +75,14 @@ trait ProvidesHoodieConfig extends Logging {
         SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
         OPERATION.key -> UPSERT_OPERATION_OPT_VAL,
         PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
-        META_SYNC_ENABLED.key -> enableHive.toString,
-        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HIVE_USE_JDBC.key -> "false",
-        HIVE_DATABASE.key -> tableId.database.getOrElse("default"),
-        HIVE_TABLE.key -> tableId.table,
-        HIVE_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
-        HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
-        HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
-        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
+        HoodieSyncConfig.META_SYNC_ENABLED.key -> enableHive.toString,
+        HiveSyncConfig.HIVE_SYNC_MODE.key -> hiveSyncConfig.syncMode,
+        HoodieSyncConfig.META_SYNC_DATABASE_NAME.key -> hiveSyncConfig.databaseName,
+        HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> hiveSyncConfig.tableName,
+        HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
+        HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> hiveSyncConfig.partitionValueExtractorClass,
+        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.supportTimestamp.toString,
+        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "200"),
         SqlKeyGenerator.PARTITION_SCHEMA -> hoodieCatalogTable.partitionSchema.toDDL
       )
         .filter { case(_, v) => v != null }
@@ -98,10 +109,12 @@ trait ProvidesHoodieConfig extends Logging {
     val path = hoodieCatalogTable.tableLocation
     val tableType = hoodieCatalogTable.tableTypeName
     val tableConfig = hoodieCatalogTable.tableConfig
-    val tableSchema = hoodieCatalogTable.tableSchema
+    val catalogProperties = hoodieCatalogTable.catalogProperties
+
+    val hoodieProps = getHoodieProps(catalogProperties, tableConfig, sparkSession.sqlContext.conf, extraOptions)
+    val hiveSyncConfig = buildHiveSyncConfig(hoodieProps, hoodieCatalogTable)
 
-    val options = hoodieCatalogTable.catalogProperties ++ tableConfig.getProps.asScala.toMap ++ extraOptions
-    val parameters = withSparkConf(sparkSession, options)()
+    val parameters = withSparkConf(sparkSession, catalogProperties)()
 
     val partitionFieldsStr = hoodieCatalogTable.partitionFields.mkString(",")
 
@@ -161,7 +174,7 @@ trait ProvidesHoodieConfig extends Logging {
 
     val enableHive = isEnableHive(sparkSession)
 
-    withSparkConf(sparkSession, options) {
+    withSparkConf(sparkSession, catalogProperties) {
       Map(
         "path" -> path,
         TABLE_TYPE.key -> tableType,
@@ -177,20 +190,127 @@ trait ProvidesHoodieConfig extends Logging {
         PAYLOAD_CLASS_NAME.key -> payloadClassName,
         ENABLE_ROW_WRITER.key -> enableBulkInsert.toString,
         HoodieWriteConfig.COMBINE_BEFORE_INSERT.key -> String.valueOf(hasPrecombineColumn),
-        HIVE_PARTITION_FIELDS.key -> partitionFieldsStr,
-        META_SYNC_ENABLED.key -> enableHive.toString,
-        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HIVE_USE_JDBC.key -> "false",
-        HIVE_DATABASE.key -> hoodieCatalogTable.table.identifier.database.getOrElse("default"),
-        HIVE_TABLE.key -> hoodieCatalogTable.table.identifier.table,
-        HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
-        HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
-        HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> "200",
-        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
+        HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> partitionFieldsStr,
+        HoodieSyncConfig.META_SYNC_ENABLED.key -> enableHive.toString,
+        HiveSyncConfig.HIVE_SYNC_MODE.key -> hiveSyncConfig.syncMode,
+        HoodieSyncConfig.META_SYNC_DATABASE_NAME.key -> hiveSyncConfig.databaseName,
+        HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> hiveSyncConfig.tableName,
+        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.supportTimestamp.toString,
+        HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> hiveSyncConfig.partitionValueExtractorClass,
+        HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "200"),
+        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "200"),
         SqlKeyGenerator.PARTITION_SCHEMA -> hoodieCatalogTable.partitionSchema.toDDL
       )
         .filter { case (_, v) => v != null }
     }
   }
 
+  def buildHoodieDropPartitionsConfig(
+                                 sparkSession: SparkSession,
+                                 hoodieCatalogTable: HoodieCatalogTable,
+                                 partitionsToDrop: String): Map[String, String] = {
+    val partitionFields = hoodieCatalogTable.partitionFields.mkString(",")
+    val enableHive = isEnableHive(sparkSession)
+    val catalogProperties = hoodieCatalogTable.catalogProperties
+    val tableConfig = hoodieCatalogTable.tableConfig
+
+    val hoodieProps = getHoodieProps(catalogProperties, tableConfig, sparkSession.sqlContext.conf)
+    val hiveSyncConfig = buildHiveSyncConfig(hoodieProps, hoodieCatalogTable)
+
+    withSparkConf(sparkSession, catalogProperties) {
+      Map(
+        "path" -> hoodieCatalogTable.tableLocation,
+        TBL_NAME.key -> hoodieCatalogTable.tableName,
+        TABLE_TYPE.key -> hoodieCatalogTable.tableTypeName,
+        OPERATION.key -> DataSourceWriteOptions.DELETE_PARTITION_OPERATION_OPT_VAL,
+        PARTITIONS_TO_DELETE.key -> partitionsToDrop,
+        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
+        PRECOMBINE_FIELD.key -> hoodieCatalogTable.preCombineKey.getOrElse(""),
+        PARTITIONPATH_FIELD.key -> partitionFields,
+        HoodieSyncConfig.META_SYNC_ENABLED.key -> enableHive.toString,
+        HiveSyncConfig.HIVE_SYNC_ENABLED.key -> enableHive.toString,
+        HiveSyncConfig.HIVE_SYNC_MODE.key -> hiveSyncConfig.syncMode,
+        HoodieSyncConfig.META_SYNC_DATABASE_NAME.key -> hiveSyncConfig.databaseName,
+        HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> hiveSyncConfig.tableName,
+        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.supportTimestamp.toString,
+        HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> partitionFields,
+        HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> hiveSyncConfig.partitionValueExtractorClass
+      )
+        .filter { case (_, v) => v != null }
+    }
+  }
+
+  def buildHoodieDeleteTableConfig(hoodieCatalogTable: HoodieCatalogTable,
+                                   sparkSession: SparkSession): Map[String, String] = {
+    val path = hoodieCatalogTable.tableLocation
+    val catalogProperties = hoodieCatalogTable.catalogProperties
+    val tableConfig = hoodieCatalogTable.tableConfig
+    val tableSchema = hoodieCatalogTable.tableSchema
+    val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase(Locale.ROOT))
+    val partitionSchema = StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
+
+    assert(hoodieCatalogTable.primaryKeys.nonEmpty,
+      s"There are no primary key defined in table ${hoodieCatalogTable.table.identifier}, cannot execute delete operation")
+
+    val hoodieProps = getHoodieProps(catalogProperties, tableConfig, sparkSession.sqlContext.conf)
+    val hiveSyncConfig = buildHiveSyncConfig(hoodieProps, hoodieCatalogTable)
+
+    // operation can not be overwrite
+    val options = hoodieCatalogTable.catalogProperties.-(OPERATION.key())
+
+    withSparkConf(sparkSession, options) {
+      Map(
+        "path" -> path,
+        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
+        TBL_NAME.key -> tableConfig.getTableName,
+        HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
+        URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitioning,
+        KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
+        SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
+        OPERATION.key -> DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL,
+        PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
+        HiveSyncConfig.HIVE_SYNC_MODE.key -> hiveSyncConfig.syncMode,
+        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.supportTimestamp.toString,
+        HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key, "200"),
+        SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
+      )
+    }
+  }
+
+  def getHoodieProps(catalogProperties: Map[String, String], tableConfig: HoodieTableConfig, conf: SQLConf, extraOptions: Map[String, String] = Map.empty): TypedProperties = {
+    val options: Map[String, String] = catalogProperties ++ tableConfig.getProps.asScala.toMap ++ conf.getAllConfs ++ extraOptions
+    val hoodieConfig = HoodieWriterUtils.convertMapToHoodieConfig(options)
+    hoodieConfig.getProps
+  }
+
+  def buildHiveSyncConfig(props: TypedProperties, hoodieCatalogTable: HoodieCatalogTable): HiveSyncConfig = {
+    val hiveSyncConfig: HiveSyncConfig = new HiveSyncConfig
+    hiveSyncConfig.basePath = hoodieCatalogTable.tableLocation
+    hiveSyncConfig.baseFileFormat = hoodieCatalogTable.baseFileFormat
+    hiveSyncConfig.usePreApacheInputFormat = props.getBoolean(HiveSyncConfig.HIVE_USE_PRE_APACHE_INPUT_FORMAT.key, HiveSyncConfig.HIVE_USE_PRE_APACHE_INPUT_FORMAT.defaultValue.toBoolean)
+    hiveSyncConfig.databaseName = hoodieCatalogTable.table.identifier.database.getOrElse("default")
+    if (props.containsKey(HoodieSyncConfig.META_SYNC_TABLE_NAME.key)) {
+      hiveSyncConfig.tableName = props.getString(HoodieSyncConfig.META_SYNC_TABLE_NAME.key)
+    } else {
+      hiveSyncConfig.tableName = hoodieCatalogTable.table.identifier.table
+    }
+    hiveSyncConfig.syncMode = props.getString(HiveSyncConfig.HIVE_SYNC_MODE.key, HiveSyncMode.HMS.name())
+    hiveSyncConfig.hiveUser = props.getString(HiveSyncConfig.HIVE_USER.key, HiveSyncConfig.HIVE_USER.defaultValue)
+    hiveSyncConfig.hivePass = props.getString(HiveSyncConfig.HIVE_PASS.key, HiveSyncConfig.HIVE_PASS.defaultValue)
+    hiveSyncConfig.jdbcUrl = props.getString(HiveSyncConfig.HIVE_URL.key, HiveSyncConfig.HIVE_URL.defaultValue)
+    hiveSyncConfig.metastoreUris = props.getString(HiveSyncConfig.METASTORE_URIS.key, HiveSyncConfig.METASTORE_URIS.defaultValue)
+    hiveSyncConfig.partitionFields = props.getStringList(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key, ",", new util.ArrayList[String])
+    hiveSyncConfig.partitionValueExtractorClass = props.getString(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key, classOf[MultiPartKeysValueExtractor].getName)
+    if (props.containsKey(HiveSyncConfig.HIVE_SYNC_MODE.key)) hiveSyncConfig.syncMode = props.getString(HiveSyncConfig.HIVE_SYNC_MODE.key)
+    hiveSyncConfig.autoCreateDatabase = props.getString(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key, HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.defaultValue).toBoolean
+    hiveSyncConfig.ignoreExceptions = props.getString(HiveSyncConfig.HIVE_IGNORE_EXCEPTIONS.key, HiveSyncConfig.HIVE_IGNORE_EXCEPTIONS.defaultValue).toBoolean
+    hiveSyncConfig.skipROSuffix = props.getString(HiveSyncConfig.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE.key, HiveSyncConfig.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE.defaultValue).toBoolean
+    hiveSyncConfig.supportTimestamp = props.getString(HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key, "true").toBoolean
+    hiveSyncConfig.isConditionalSync = props.getString(HoodieSyncConfig.META_SYNC_CONDITIONAL_SYNC.key, HoodieSyncConfig.META_SYNC_CONDITIONAL_SYNC.defaultValue).toBoolean
+    hiveSyncConfig.bucketSpec = if (props.getBoolean(HiveSyncConfig.HIVE_SYNC_BUCKET_SYNC.key, HiveSyncConfig.HIVE_SYNC_BUCKET_SYNC.defaultValue)) HiveSyncConfig.getBucketSpec(props.getString(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key), props.getInteger(HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS.key))
+    else null
+    if (props.containsKey(HiveExternalCatalog.CREATED_SPARK_VERSION)) hiveSyncConfig.sparkVersion = props.getString(HiveExternalCatalog.CREATED_SPARK_VERSION)
+    hiveSyncConfig.syncComment = props.getString(DataSourceWriteOptions.HIVE_SYNC_COMMENT.key, DataSourceWriteOptions.HIVE_SYNC_COMMENT.defaultValue).toBoolean
+    hiveSyncConfig
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala
index c4f5cd39f6073..1d65670f6d3f3 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala
@@ -110,7 +110,7 @@ object AlterHoodieTableAddColumnsCommand {
       HoodieWriterUtils.parametersWithWriteDefaults(hoodieCatalogTable.catalogProperties).asJava
     )
 
-    val commitActionType = CommitUtils.getCommitActionType(WriteOperationType.INSERT, hoodieCatalogTable.tableType)
+    val commitActionType = CommitUtils.getCommitActionType(WriteOperationType.ALTER_SCHEMA, hoodieCatalogTable.tableType)
     val instantTime = HoodieActiveTimeline.createNewInstantTime
     client.startCommitWithTime(instantTime, commitActionType)
 
@@ -118,7 +118,7 @@ object AlterHoodieTableAddColumnsCommand {
     val timeLine = hoodieTable.getActiveTimeline
     val requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime)
     val metadata = new HoodieCommitMetadata
-    metadata.setOperationType(WriteOperationType.INSERT)
+    metadata.setOperationType(WriteOperationType.ALTER_SCHEMA)
     timeLine.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString.getBytes(StandardCharsets.UTF_8)))
 
     client.commit(instantTime, jsc.emptyRDD)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableDropPartitionCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableDropPartitionCommand.scala
index 2e639d78e1e17..c7afbfe11f998 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableDropPartitionCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableDropPartitionCommand.scala
@@ -17,21 +17,15 @@
 
 package org.apache.spark.sql.hudi.command
 
-import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.HoodieSparkSqlWriter
 import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.fs.FSUtils
-import org.apache.hudi.common.util.PartitionPathEncodeUtils
-import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-import org.apache.hudi.hive.{HiveSyncConfig, MultiPartKeysValueExtractor}
-import org.apache.hudi.hive.ddl.HiveSyncMode
-import org.apache.hudi.sync.common.HoodieSyncConfig
-import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkSqlWriter}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
 
 case class AlterHoodieTableDropPartitionCommand(
@@ -40,7 +34,7 @@ case class AlterHoodieTableDropPartitionCommand(
     ifExists : Boolean,
     purge : Boolean,
     retainData : Boolean)
-  extends HoodieLeafRunnableCommand {
+  extends HoodieLeafRunnableCommand with ProvidesHoodieConfig {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}"
@@ -64,7 +58,7 @@ case class AlterHoodieTableDropPartitionCommand(
     }
 
     val partitionsToDrop = getPartitionPathToDrop(hoodieCatalogTable, normalizedSpecs)
-    val parameters = buildHoodieConfig(sparkSession, hoodieCatalogTable, partitionsToDrop)
+    val parameters = buildHoodieDropPartitionsConfig(sparkSession, hoodieCatalogTable, partitionsToDrop)
     HoodieSparkSqlWriter.write(
       sparkSession.sqlContext,
       SaveMode.Append,
@@ -86,86 +80,4 @@ case class AlterHoodieTableDropPartitionCommand(
     logInfo(s"Finish execute alter table drop partition command for $fullTableName")
     Seq.empty[Row]
   }
-
-  private def buildHoodieConfig(
-      sparkSession: SparkSession,
-      hoodieCatalogTable: HoodieCatalogTable,
-      partitionsToDrop: String): Map[String, String] = {
-    val partitionFields = hoodieCatalogTable.partitionFields.mkString(",")
-    val enableHive = isEnableHive(sparkSession)
-    withSparkConf(sparkSession, Map.empty) {
-      Map(
-        "path" -> hoodieCatalogTable.tableLocation,
-        TBL_NAME.key -> hoodieCatalogTable.tableName,
-        TABLE_TYPE.key -> hoodieCatalogTable.tableTypeName,
-        OPERATION.key -> DataSourceWriteOptions.DELETE_PARTITION_OPERATION_OPT_VAL,
-        PARTITIONS_TO_DELETE.key -> partitionsToDrop,
-        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
-        PRECOMBINE_FIELD.key -> hoodieCatalogTable.preCombineKey.getOrElse(""),
-        PARTITIONPATH_FIELD.key -> partitionFields,
-        HoodieSyncConfig.META_SYNC_ENABLED.key -> enableHive.toString,
-        HiveSyncConfig.HIVE_SYNC_ENABLED.key -> enableHive.toString,
-        HiveSyncConfig.HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HiveSyncConfig.HIVE_USE_JDBC.key -> "false",
-        HoodieSyncConfig.META_SYNC_DATABASE_NAME.key -> hoodieCatalogTable.table.identifier.database.getOrElse("default"),
-        HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> hoodieCatalogTable.table.identifier.table,
-        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
-        HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> partitionFields,
-        HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName
-      )
-    }
-  }
-
-  def normalizePartitionSpec[T](
-      partitionSpec: Map[String, T],
-      partColNames: Seq[String],
-      tblName: String,
-      resolver: Resolver): Map[String, T] = {
-    val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) =>
-      val normalizedKey = partColNames.find(resolver(_, key)).getOrElse {
-        throw new AnalysisException(s"$key is not a valid partition column in table $tblName.")
-      }
-      normalizedKey -> value
-    }
-
-    if (normalizedPartSpec.size < partColNames.size) {
-      throw new AnalysisException(
-        "All partition columns need to be specified for Hoodie's dropping partition")
-    }
-
-    val lowerPartColNames = partColNames.map(_.toLowerCase)
-    if (lowerPartColNames.distinct.length != lowerPartColNames.length) {
-      val duplicateColumns = lowerPartColNames.groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => s"`$x`"
-      }
-      throw new AnalysisException(
-        s"Found duplicate column(s) in the partition schema: ${duplicateColumns.mkString(", ")}")
-    }
-
-    normalizedPartSpec.toMap
-  }
-
-  def getPartitionPathToDrop(
-      hoodieCatalogTable: HoodieCatalogTable,
-      normalizedSpecs: Seq[Map[String, String]]): String = {
-    val table = hoodieCatalogTable.table
-    val allPartitionPaths = hoodieCatalogTable.getPartitionPaths
-    val enableHiveStylePartitioning = isHiveStyledPartitioning(allPartitionPaths, table)
-    val enableEncodeUrl = isUrlEncodeEnabled(allPartitionPaths, table)
-    val partitionsToDrop = normalizedSpecs.map { spec =>
-      hoodieCatalogTable.partitionFields.map { partitionColumn =>
-        val encodedPartitionValue = if (enableEncodeUrl) {
-          PartitionPathEncodeUtils.escapePathName(spec(partitionColumn))
-        } else {
-          spec(partitionColumn)
-        }
-        if (enableHiveStylePartitioning) {
-          partitionColumn + "=" + encodedPartitionValue
-        } else {
-          encodedPartitionValue
-        }
-      }.mkString("/")
-    }.mkString(",")
-    partitionsToDrop
-  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
index da9fcb8d45de0..195bf4153c998 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
@@ -18,19 +18,18 @@
 package org.apache.spark.sql.hudi.command
 
 import org.apache.hadoop.fs.Path
-
-import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
 import org.apache.hudi.common.model.HoodieFileFormat
 import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.hadoop.HoodieParquetInputFormat
 import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, TableAlreadyExistsException}
+import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.hive.HiveClientUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog._
-import org.apache.spark.sql.hudi.{HoodieOptionConfig, HoodieSqlCommonUtils}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.isEnableHive
+import org.apache.spark.sql.hudi.{HoodieOptionConfig, HoodieSqlCommonUtils}
 import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.{Row, SparkSession}
@@ -167,10 +166,6 @@ object CreateHoodieTableCommand {
     if (!dbExists) {
       throw new NoSuchDatabaseException(dbName)
     }
-    // check table exists
-    if (sparkSession.sessionState.catalog.tableExists(table.identifier)) {
-      throw new TableAlreadyExistsException(dbName, table.identifier.table)
-    }
     // append some table properties need for spark data source table.
     val dataSourceProps = tableMetaToTableProps(sparkSession.sparkContext.conf,
       table, table.schema)
@@ -179,7 +174,7 @@ object CreateHoodieTableCommand {
     val client = HiveClientUtils.newClientForMetadata(sparkSession.sparkContext.conf,
       sparkSession.sessionState.newHadoopConf())
     // create hive table.
-    client.createTable(tableWithDataSourceProps, ignoreIfExists)
+    client.createTable(tableWithDataSourceProps, ignoreIfExists = true)
   }
 
   // This code is forked from org.apache.spark.sql.hive.HiveExternalCatalog#tableMetaToTableProps
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala
similarity index 100%
rename from hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala
rename to hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala
index 4d2debbe934ea..04936978ed1de 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala
@@ -17,42 +17,107 @@
 
 package org.apache.spark.sql.hudi.command
 
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.table.HoodieTableMetaClient
-
-import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
-import org.apache.spark.sql.execution.command.TruncateTableCommand
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getPartitionPathToDrop, normalizePartitionSpec}
+import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 
 import scala.util.control.NonFatal
 
 /**
  * Command for truncate hudi table.
  */
-class TruncateHoodieTableCommand(
+case class TruncateHoodieTableCommand(
    tableIdentifier: TableIdentifier,
    partitionSpec: Option[TablePartitionSpec])
-  extends TruncateTableCommand(tableIdentifier, partitionSpec) {
+  extends HoodieLeafRunnableCommand {
+
+  override def run(spark: SparkSession): Seq[Row] = {
+    val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}"
+    logInfo(s"start execute truncate table command for $fullTableName")
 
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
+    val hoodieCatalogTable = HoodieCatalogTable(spark, tableIdentifier)
     val properties = hoodieCatalogTable.tableConfig.getProps
 
     try {
       // Delete all data in the table directory
-      super.run(sparkSession)
+      val catalog = spark.sessionState.catalog
+      val table = catalog.getTableMetadata(tableIdentifier)
+      val tableIdentWithDB = table.identifier.quotedString
+
+      if (table.tableType == CatalogTableType.VIEW) {
+        throw new AnalysisException(
+          s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentWithDB")
+      }
+
+      if (table.partitionColumnNames.isEmpty && partitionSpec.isDefined) {
+        throw new AnalysisException(
+          s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported " +
+            s"for tables that are not partitioned: $tableIdentWithDB")
+      }
+
+      val basePath = hoodieCatalogTable.tableLocation
+      val partCols = table.partitionColumnNames
+      val locations = if (partitionSpec.isEmpty || partCols.isEmpty) {
+        Seq(basePath)
+      } else {
+        val normalizedSpec: Seq[Map[String, String]] = Seq(partitionSpec.map { spec =>
+          normalizePartitionSpec(
+            spec,
+            partCols,
+            table.identifier.quotedString,
+            spark.sessionState.conf.resolver)
+        }.get)
+
+        val fullPartitionPath = FSUtils.getPartitionPath(basePath, getPartitionPathToDrop(hoodieCatalogTable, normalizedSpec))
+
+        Seq(fullPartitionPath)
+      }
+
+      val hadoopConf = spark.sessionState.newHadoopConf()
+      locations.foreach { location =>
+        val path = new Path(location.toString)
+        try {
+          val fs = path.getFileSystem(hadoopConf)
+          fs.delete(path, true)
+          fs.mkdirs(path)
+        } catch {
+          case NonFatal(e) =>
+            throw new AnalysisException(
+              s"Failed to truncate table $tableIdentWithDB when removing data of the path: $path " +
+                s"because of ${e.toString}")
+        }
+      }
+
+      // Also try to drop the contents of the table from the columnar cache
+      try {
+        spark.sharedState.cacheManager.uncacheQuery(spark.table(table.identifier), cascade = true)
+      } catch {
+        case NonFatal(_) =>
+      }
+
+      if (table.stats.nonEmpty) {
+        // empty table after truncation
+        val newStats = CatalogStatistics(sizeInBytes = 0, rowCount = Some(0))
+        catalog.alterTableStats(tableIdentifier, Some(newStats))
+      }
+      Seq.empty[Row]
     } catch {
       // TruncateTableCommand will delete the related directories first, and then refresh the table.
       // It will fail when refresh table, because the hudi meta directory(.hoodie) has been deleted at the first step.
       // So here ignore this failure, and refresh table later.
-      case NonFatal(_) =>
+      case NonFatal(e) =>
+        throw new AnalysisException(s"Exception when attempting to truncate table ${tableIdentifier.quotedString}: " + e)
     }
 
     // If we have not specified the partition, truncate will delete all the data in the table path
     // include the hoodie.properties. In this case we should reInit the table.
     if (partitionSpec.isEmpty) {
-      val hadoopConf = sparkSession.sessionState.newHadoopConf()
+      val hadoopConf = spark.sessionState.newHadoopConf()
       // ReInit hoodie.properties
       HoodieTableMetaClient.withPropertyBuilder()
         .fromProperties(properties)
@@ -61,7 +126,7 @@ class TruncateHoodieTableCommand(
 
     // After deleting the data, refresh the table to make sure we don't keep around a stale
     // file relation in the metastore cache and cached table data in the cache manager.
-    sparkSession.catalog.refreshTable(hoodieCatalogTable.table.identifier.quotedString)
+    spark.catalog.refreshTable(hoodieCatalogTable.table.identifier.quotedString)
     Seq.empty[Row]
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala
index 4e46233c3596e..5a2b30fae11e5 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala
@@ -154,6 +154,7 @@ class HoodieStreamSource(
     } else {
       // Consume the data between (startCommitTime, endCommitTime]
       val incParams = parameters ++ Map(
+        DataSourceReadOptions.QUERY_TYPE.key -> DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL,
         DataSourceReadOptions.BEGIN_INSTANTTIME.key -> startCommitTime(startOffset),
         DataSourceReadOptions.END_INSTANTTIME.key -> endOffset.commitTime
       )
diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml
index 606f6fa894d72..1b83cf5eca662 100644
--- a/hudi-spark-datasource/hudi-spark/pom.xml
+++ b/hudi-spark-datasource/hudi-spark/pom.xml
@@ -19,12 +19,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark_${scala.binary.version}</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-spark_${scala.binary.version}</name>
   <packaging>jar</packaging>
@@ -329,13 +329,6 @@
       <scope>test</scope>
     </dependency>
 
-    <!-- Spark (Packages) -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
     <!-- Hadoop -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
index 9aa7ac1a664cd..56ad5a8b66c82 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
+++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
@@ -246,4 +246,4 @@ public static Map<String, String> getQuickstartWriteConfigs() {
     demoConfigs.put("hoodie.delete.shuffle.parallelism", "2");
     return demoConfigs;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala
index 9cc95e6f9e90d..fff44bb7f570b 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala
@@ -39,6 +39,10 @@ class HoodieSparkSessionExtension extends (SparkSessionExtensions => Unit)
       }
     }
 
+    extensions.injectResolutionRule { session =>
+      sparkAdapter.createResolveHudiAlterTableCommand(session)
+    }
+
     HoodieAnalysis.customPostHocResolutionRules().foreach { rule =>
       extensions.injectPostHocResolutionRule { session =>
         rule(session)
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala
index 7bd9a3f229b29..5b513f7500c10 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala
@@ -17,97 +17,37 @@
 
 package org.apache.spark.sql.hudi.command
 
-import org.apache.hudi.HoodieCLIUtils
-import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieTableType}
+import org.apache.hudi.common.model.HoodieTableType
 import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline}
-import org.apache.hudi.common.util.{HoodieTimer, Option => HOption}
-import org.apache.hudi.exception.HoodieException
+
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation
 import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation.{CompactionOperation, RUN, SCHEDULE}
+import org.apache.spark.sql.hudi.command.procedures.{HoodieProcedureUtils, RunCompactionProcedure}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.unsafe.types.UTF8String
 
-import scala.collection.JavaConversions._
-import scala.collection.JavaConverters._
-
+@Deprecated
 case class CompactionHoodiePathCommand(path: String,
-  operation: CompactionOperation, instantTimestamp: Option[Long] = None)
+                                       operation: CompactionOperation,
+                                       instantTimestamp: Option[Long] = None)
   extends HoodieLeafRunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val metaClient = HoodieTableMetaClient.builder().setBasePath(path)
       .setConf(sparkSession.sessionState.newHadoopConf()).build()
+    assert(metaClient.getTableType == HoodieTableType.MERGE_ON_READ, s"Must compaction on a Merge On Read table.")
 
-    assert(metaClient.getTableType == HoodieTableType.MERGE_ON_READ,
-      s"Must compaction on a Merge On Read table.")
-    val client = HoodieCLIUtils.createHoodieClientFromPath(sparkSession, path, Map.empty)
-
-    operation match {
-      case SCHEDULE =>
-        val instantTime = instantTimestamp.map(_.toString).getOrElse(HoodieActiveTimeline.createNewInstantTime)
-        if (client.scheduleCompactionAtInstant(instantTime, HOption.empty[java.util.Map[String, String]])) {
-          Seq(Row(instantTime))
-        } else {
-          Seq.empty[Row]
-        }
-      case RUN =>
-        // Do compaction
-        val timeLine = metaClient.getActiveTimeline
-         val pendingCompactionInstants = timeLine.getWriteTimeline.getInstants.iterator().asScala
-          .filter(p => p.getAction == HoodieTimeline.COMPACTION_ACTION)
-           .map(_.getTimestamp)
-          .toSeq.sortBy(f => f)
-        val willCompactionInstants = if (instantTimestamp.isEmpty) {
-           if (pendingCompactionInstants.nonEmpty) {
-             pendingCompactionInstants
-           } else { // If there are no pending compaction, schedule to generate one.
-             // CompactionHoodiePathCommand will return instanceTime for SCHEDULE.
-             val scheduleSeq = CompactionHoodiePathCommand(path, CompactionOperation.SCHEDULE).run(sparkSession)
-             if (scheduleSeq.isEmpty) {
-               Seq.empty
-             } else {
-               Seq(scheduleSeq.take(1).get(0).getString(0)).filter(_ != null)
-             }
-           }
-        } else {
-          // Check if the compaction timestamp has exists in the pending compaction
-          if (pendingCompactionInstants.contains(instantTimestamp.get.toString)) {
-            Seq(instantTimestamp.get.toString)
-          } else {
-            throw new IllegalArgumentException(s"Compaction instant: ${instantTimestamp.get} is not found in $path," +
-              s" Available pending compaction instants are: ${pendingCompactionInstants.mkString(",")} ")
-          }
-        }
-        if (willCompactionInstants.isEmpty) {
-          logInfo(s"No need to compaction on $path")
-          Seq.empty[Row]
-        } else {
-          logInfo(s"Run compaction at instants: [${willCompactionInstants.mkString(",")}] on $path")
-          val timer = new HoodieTimer
-          timer.startTimer()
-          willCompactionInstants.foreach {compactionInstant =>
-            val writeResponse = client.compact(compactionInstant)
-            handleResponse(writeResponse.getCommitMetadata.get())
-            client.commitCompaction(compactionInstant, writeResponse.getCommitMetadata.get(), HOption.empty())
-          }
-          logInfo(s"Finish Run compaction at instants: [${willCompactionInstants.mkString(",")}]," +
-            s" spend: ${timer.endTimer()}ms")
-          Seq.empty[Row]
-        }
-      case _=> throw new UnsupportedOperationException(s"Unsupported compaction operation: $operation")
+    val op = operation match {
+      case SCHEDULE => UTF8String.fromString("schedule")
+      case RUN => UTF8String.fromString("run")
+      case _ => throw new UnsupportedOperationException(s"Unsupported compaction operation: $operation")
     }
-  }
-
-  private def handleResponse(metadata: HoodieCommitMetadata): Unit = {
 
-    // Handle error
-    val writeStats = metadata.getPartitionToWriteStats.entrySet().flatMap(e => e.getValue).toList
-    val errorsCount = writeStats.map(state => state.getTotalWriteErrors).sum
-    if (errorsCount > 0) {
-      throw new HoodieException(s" Found $errorsCount when writing record")
-    }
+    var args: Map[String, Any] = Map("op" -> op, "path" -> UTF8String.fromString(path))
+    instantTimestamp.foreach(timestamp => args += "timestamp" -> timestamp)
+    val procedureArgs = HoodieProcedureUtils.buildProcedureArgs(args)
+    RunCompactionProcedure.builder.get().build.call(procedureArgs)
   }
 
   override val output: Seq[Attribute] = {
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala
index 2c89ed8c9d203..5e362314c2df7 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala
@@ -24,8 +24,10 @@ import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.getTableLocation
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.sql.{Row, SparkSession}
 
+@Deprecated
 case class CompactionHoodieTableCommand(table: CatalogTable,
-  operation: CompactionOperation, instantTimestamp: Option[Long])
+                                        operation: CompactionOperation,
+                                        instantTimestamp: Option[Long])
   extends HoodieLeafRunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala
index 44c57239703f3..965724163b96c 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala
@@ -19,41 +19,32 @@ package org.apache.spark.sql.hudi.command
 
 import org.apache.hudi.common.model.HoodieTableType
 import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.hudi.common.table.timeline.HoodieTimeline
-import org.apache.hudi.common.util.CompactionUtils
+
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.hudi.command.procedures.{HoodieProcedureUtils, ShowCompactionProcedure}
 import org.apache.spark.sql.types.{IntegerType, StringType}
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.unsafe.types.UTF8String
 
-import scala.collection.JavaConverters.asScalaIteratorConverter
-
+@Deprecated
 case class CompactionShowHoodiePathCommand(path: String, limit: Int)
   extends HoodieLeafRunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val metaClient = HoodieTableMetaClient.builder().setBasePath(path.toString)
+    val metaClient = HoodieTableMetaClient.builder().setBasePath(path)
       .setConf(sparkSession.sessionState.newHadoopConf()).build()
 
     assert(metaClient.getTableType == HoodieTableType.MERGE_ON_READ,
       s"Cannot show compaction on a Non Merge On Read table.")
-    val timeLine = metaClient.getActiveTimeline
-    val compactionInstants = timeLine.getInstants.iterator().asScala
-      .filter(p => p.getAction == HoodieTimeline.COMPACTION_ACTION)
-      .toSeq
-      .sortBy(f => f.getTimestamp)
-      .reverse
-      .take(limit)
-    val compactionPlans = compactionInstants.map(instant =>
-      (instant, CompactionUtils.getCompactionPlan(metaClient, instant.getTimestamp)))
-    compactionPlans.map { case (instant, plan) =>
-      Row(instant.getTimestamp, instant.getAction, plan.getOperations.size())
-    }
+
+    val args = Map("path" -> UTF8String.fromString(path), "limit" -> limit)
+    val procedureArgs = HoodieProcedureUtils.buildProcedureArgs(args)
+    ShowCompactionProcedure.builder.get().build.call(procedureArgs)
   }
 
   override val output: Seq[Attribute] = {
     Seq(
-      AttributeReference("timestamp", StringType, nullable = false)(),
+      AttributeReference("instant", StringType, nullable = false)(),
       AttributeReference("action", StringType, nullable = false)(),
       AttributeReference("size", IntegerType, nullable = false)()
     )
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala
index a9176164f4c6c..f3f0a8e529be9 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.getTableLocation
 import org.apache.spark.sql.types.{IntegerType, StringType}
 import org.apache.spark.sql.{Row, SparkSession}
 
+@Deprecated
 case class CompactionShowHoodieTableCommand(table: CatalogTable, limit: Int)
   extends HoodieLeafRunnableCommand {
 
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala
index f6da1b3868302..632a983b48960 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala
@@ -17,20 +17,15 @@
 
 package org.apache.spark.sql.hudi.command
 
-import org.apache.hudi.DataSourceWriteOptions.{OPERATION, _}
-import org.apache.hudi.config.HoodieWriteConfig
-import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-import org.apache.hudi.hive.HiveSyncConfig
-import org.apache.hudi.hive.ddl.HiveSyncMode
-import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
+import org.apache.hudi.SparkAdapterSupport
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
 import org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
 
 case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends HoodieLeafRunnableCommand
-  with SparkAdapterSupport {
+  with SparkAdapterSupport with ProvidesHoodieConfig {
 
   private val table = deleteTable.table
 
@@ -44,7 +39,9 @@ case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends Hoodie
     if (deleteTable.condition.isDefined) {
       df = df.filter(Column(deleteTable.condition.get))
     }
-    val config = buildHoodieConfig(sparkSession)
+
+    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableId)
+    val config = buildHoodieDeleteTableConfig(hoodieCatalogTable, sparkSession)
     df.write
       .format("hudi")
       .mode(SaveMode.Append)
@@ -54,33 +51,4 @@ case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends Hoodie
     logInfo(s"Finish execute delete command for $tableId")
     Seq.empty[Row]
   }
-
-  private def buildHoodieConfig(sparkSession: SparkSession): Map[String, String] = {
-    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableId)
-    val path = hoodieCatalogTable.tableLocation
-    val tableConfig = hoodieCatalogTable.tableConfig
-    val tableSchema = hoodieCatalogTable.tableSchema
-    val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
-    val partitionSchema = StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
-
-    assert(hoodieCatalogTable.primaryKeys.nonEmpty,
-      s"There are no primary key defined in table $tableId, cannot execute delete operator")
-    withSparkConf(sparkSession, hoodieCatalogTable.catalogProperties) {
-      Map(
-        "path" -> path,
-        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
-        TBL_NAME.key -> tableConfig.getTableName,
-        HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
-        URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitioning,
-        KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
-        SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
-        OPERATION.key -> DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL,
-        PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
-        HiveSyncConfig.HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
-        HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
-        SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
-      )
-    }
-  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
index 74d6226b49f6d..1376445bda966 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
@@ -22,8 +22,7 @@ import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.common.util.StringUtils
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-import org.apache.hudi.hive.{HiveSyncConfig, MultiPartKeysValueExtractor}
-import org.apache.hudi.hive.ddl.HiveSyncMode
+import org.apache.hudi.hive.HiveSyncConfig
 import org.apache.hudi.sync.common.HoodieSyncConfig
 import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions, HoodieSparkSqlWriter, SparkAdapterSupport}
 import org.apache.spark.sql._
@@ -34,9 +33,9 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
 import org.apache.spark.sql.hudi.HoodieSqlUtils.getMergeIntoTargetTableId
-import org.apache.spark.sql.hudi.SerDeUtils
 import org.apache.spark.sql.hudi.command.payload.ExpressionPayload
 import org.apache.spark.sql.hudi.command.payload.ExpressionPayload._
+import org.apache.spark.sql.hudi.{ProvidesHoodieConfig, SerDeUtils}
 import org.apache.spark.sql.types.{BooleanType, StructType}
 
 import java.util.Base64
@@ -61,7 +60,7 @@ import java.util.Base64
  *
  */
 case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends HoodieLeafRunnableCommand
-  with SparkAdapterSupport {
+  with SparkAdapterSupport with ProvidesHoodieConfig {
 
   private var sparkSession: SparkSession = _
 
@@ -439,6 +438,7 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
     val targetTableDb = targetTableIdentify.database.getOrElse("default")
     val targetTableName = targetTableIdentify.identifier
     val path = hoodieCatalogTable.tableLocation
+    val catalogProperties = hoodieCatalogTable.catalogProperties
     val tableConfig = hoodieCatalogTable.tableConfig
     val tableSchema = hoodieCatalogTable.tableSchema
     val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
@@ -449,6 +449,9 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
     // TODO(HUDI-3456) clean up
     val preCombineField = hoodieCatalogTable.preCombineKey.getOrElse("")
 
+    val hoodieProps = getHoodieProps(catalogProperties, tableConfig, sparkSession.sqlContext.conf)
+    val hiveSyncConfig = buildHiveSyncConfig(hoodieProps, hoodieCatalogTable)
+
     // Enable the hive sync by default if spark have enable the hive metastore.
     val enableHive = isEnableHive(sparkSession)
     withSparkConf(sparkSession, hoodieCatalogTable.catalogProperties) {
@@ -464,16 +467,15 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
         KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
         SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
         HoodieSyncConfig.META_SYNC_ENABLED.key -> enableHive.toString,
-        HiveSyncConfig.HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HiveSyncConfig.HIVE_USE_JDBC.key -> "false",
+        HiveSyncConfig.HIVE_SYNC_MODE.key -> hiveSyncConfig.syncMode,
         HoodieSyncConfig.META_SYNC_DATABASE_NAME.key -> targetTableDb,
         HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> targetTableName,
-        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
+        HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.supportTimestamp.toString,
         HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
-        HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
-        HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> "200", // set the default parallelism to 200 for sql
-        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
-        HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
+        HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> hiveSyncConfig.partitionValueExtractorClass,
+        HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "200"), // set the default parallelism to 200 for sql
+        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "200"),
+        HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key, "200"),
         SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
       )
         .filter { case (_, v) => v != null }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala
similarity index 100%
rename from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala
rename to hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
similarity index 100%
rename from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
rename to hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedureUtils.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedureUtils.scala
new file mode 100644
index 0000000000000..374f86773d1cb
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedureUtils.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+
+import java.util
+
+object HoodieProcedureUtils {
+
+  /**
+   * Build named procedure arguments from given args' map
+   *
+   * @param args The arguments map
+   * @return Named procedure arguments
+   */
+  def buildProcedureArgs(args: Map[String, Any]): ProcedureArgs = {
+    val values: Array[Any] = new Array[Any](args.size)
+    val map = new util.LinkedHashMap[String, Int]()
+
+    args.zipWithIndex.foreach {
+      case ((key, value), index) =>
+        values(index) = value
+        map.put(key, index)
+    }
+
+    ProcedureArgs(isNamedArgs = true, map, new GenericInternalRow(values))
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
index 9c05773531322..e7de3e784a2fd 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
@@ -33,6 +33,8 @@ object HoodieProcedures {
 
   private def initProcedureBuilders: util.Map[String, Supplier[ProcedureBuilder]] = {
     val mapBuilder: ImmutableMap.Builder[String, Supplier[ProcedureBuilder]] = ImmutableMap.builder()
+    mapBuilder.put(RunCompactionProcedure.NAME, RunCompactionProcedure.builder)
+    mapBuilder.put(ShowCompactionProcedure.NAME, ShowCompactionProcedure.builder)
     mapBuilder.put(CreateSavepointsProcedure.NAME, CreateSavepointsProcedure.builder)
     mapBuilder.put(DeleteSavepointsProcedure.NAME, DeleteSavepointsProcedure.builder)
     mapBuilder.put(RollbackSavepointsProcedure.NAME, RollbackSavepointsProcedure.builder)
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunClusteringProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunClusteringProcedure.scala
index 442ee04415c84..231d0939cc2e7 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunClusteringProcedure.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunClusteringProcedure.scala
@@ -24,9 +24,9 @@ import org.apache.hudi.common.util.ValidationUtils.checkArgument
 import org.apache.hudi.common.util.{ClusteringUtils, Option => HOption}
 import org.apache.hudi.config.HoodieClusteringConfig
 import org.apache.hudi.exception.HoodieClusteringException
-import org.apache.hudi.{AvroConversionUtils, HoodieCLIUtils, HoodieFileIndex}
+import org.apache.hudi.{AvroConversionUtils, HoodieCLIUtils, HoodieFileIndex, SparkAdapterSupport}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{HoodieCatalystExpressionUtils, Row}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.execution.datasources.FileStatusCache
 import org.apache.spark.sql.types._
@@ -34,7 +34,14 @@ import org.apache.spark.sql.types._
 import java.util.function.Supplier
 import scala.collection.JavaConverters._
 
-class RunClusteringProcedure extends BaseProcedure with ProcedureBuilder with PredicateHelper with Logging {
+class RunClusteringProcedure extends BaseProcedure
+  with ProcedureBuilder
+  with PredicateHelper
+  with Logging
+  with SparkAdapterSupport {
+
+  private val exprUtils = sparkAdapter.createCatalystExpressionUtils()
+
   /**
    * OPTIMIZE table_name|table_path [WHERE predicate]
    * [ORDER BY (col_name1 [, ...] ) ]
@@ -120,9 +127,9 @@ class RunClusteringProcedure extends BaseProcedure with ProcedureBuilder with Pr
     // Resolve partition predicates
     val schemaResolver = new TableSchemaResolver(metaClient)
     val tableSchema = AvroConversionUtils.convertAvroSchemaToStructType(schemaResolver.getTableAvroSchema)
-    val condition = HoodieCatalystExpressionUtils.resolveFilterExpr(sparkSession, predicate, tableSchema)
+    val condition = exprUtils.resolveExpr(sparkSession, predicate, tableSchema)
     val partitionColumns = metaClient.getTableConfig.getPartitionFields.orElse(Array[String]())
-    val (partitionPredicates, dataPredicates) = HoodieCatalystExpressionUtils.splitPartitionAndDataPredicates(
+    val (partitionPredicates, dataPredicates) = exprUtils.splitPartitionAndDataPredicates(
       sparkSession, splitConjunctivePredicates(condition).toArray, partitionColumns)
     checkArgument(dataPredicates.isEmpty, "Only partition predicates are allowed")
 
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunCompactionProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunCompactionProcedure.scala
new file mode 100644
index 0000000000000..9bca33f3882d4
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunCompactionProcedure.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.hudi.common.model.HoodieCommitMetadata
+import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline}
+import org.apache.hudi.common.util.{HoodieTimer, Option => HOption}
+import org.apache.hudi.exception.HoodieException
+import org.apache.hudi.{HoodieCLIUtils, SparkAdapterSupport}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
+
+import java.util.function.Supplier
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
+class RunCompactionProcedure extends BaseProcedure with ProcedureBuilder with SparkAdapterSupport with Logging {
+
+  /**
+   * operation = (RUN | SCHEDULE) COMPACTION  ON tableIdentifier (AT instantTimestamp = INTEGER_VALUE)?
+   * operation = (RUN | SCHEDULE) COMPACTION  ON path = STRING   (AT instantTimestamp = INTEGER_VALUE)?
+   */
+  private val PARAMETERS = Array[ProcedureParameter](
+    ProcedureParameter.required(0, "op", DataTypes.StringType, None),
+    ProcedureParameter.optional(1, "table", DataTypes.StringType, None),
+    ProcedureParameter.optional(2, "path", DataTypes.StringType, None),
+    ProcedureParameter.optional(3, "timestamp", DataTypes.LongType, None)
+  )
+
+  private val OUTPUT_TYPE = new StructType(Array[StructField](
+    StructField("instant", DataTypes.StringType, nullable = true, Metadata.empty)
+  ))
+
+  def parameters: Array[ProcedureParameter] = PARAMETERS
+
+  def outputType: StructType = OUTPUT_TYPE
+
+  override def call(args: ProcedureArgs): Seq[Row] = {
+    super.checkArgs(PARAMETERS, args)
+
+    val operation = getArgValueOrDefault(args, PARAMETERS(0)).get.asInstanceOf[String].toLowerCase
+    val tableName = getArgValueOrDefault(args, PARAMETERS(1))
+    val tablePath = getArgValueOrDefault(args, PARAMETERS(2))
+    val instantTimestamp = getArgValueOrDefault(args, PARAMETERS(3))
+
+    val basePath = getBasePath(tableName, tablePath)
+    val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
+    val client = HoodieCLIUtils.createHoodieClientFromPath(sparkSession, basePath, Map.empty)
+
+    operation match {
+      case "schedule" =>
+        val instantTime = instantTimestamp.map(_.toString).getOrElse(HoodieActiveTimeline.createNewInstantTime)
+        if (client.scheduleCompactionAtInstant(instantTime, HOption.empty[java.util.Map[String, String]])) {
+          Seq(Row(instantTime))
+        } else {
+          Seq.empty[Row]
+        }
+      case "run" =>
+        // Do compaction
+        val timeLine = metaClient.getActiveTimeline
+        val pendingCompactionInstants = timeLine.getWriteTimeline.getInstants.iterator().asScala
+          .filter(p => p.getAction == HoodieTimeline.COMPACTION_ACTION)
+          .map(_.getTimestamp)
+          .toSeq.sortBy(f => f)
+        val willCompactionInstants = if (instantTimestamp.isEmpty) {
+          if (pendingCompactionInstants.nonEmpty) {
+            pendingCompactionInstants
+          } else { // If there are no pending compaction, schedule to generate one.
+            // CompactionHoodiePathCommand will return instanceTime for SCHEDULE.
+            val instantTime = HoodieActiveTimeline.createNewInstantTime()
+            if (client.scheduleCompactionAtInstant(instantTime, HOption.empty[java.util.Map[String, String]])) {
+              Seq(instantTime)
+            } else {
+              Seq.empty
+            }
+          }
+        } else {
+          // Check if the compaction timestamp has exists in the pending compaction
+          if (pendingCompactionInstants.contains(instantTimestamp.get.toString)) {
+            Seq(instantTimestamp.get.toString)
+          } else {
+            throw new IllegalArgumentException(s"Compaction instant: ${instantTimestamp.get} is not found in " +
+              s"$basePath, Available pending compaction instants are: ${pendingCompactionInstants.mkString(",")} ")
+          }
+        }
+        if (willCompactionInstants.isEmpty) {
+          logInfo(s"No need to compaction on $basePath")
+          Seq.empty[Row]
+        } else {
+          logInfo(s"Run compaction at instants: [${willCompactionInstants.mkString(",")}] on $basePath")
+          val timer = new HoodieTimer
+          timer.startTimer()
+          willCompactionInstants.foreach { compactionInstant =>
+            val writeResponse = client.compact(compactionInstant)
+            handleResponse(writeResponse.getCommitMetadata.get())
+            client.commitCompaction(compactionInstant, writeResponse.getCommitMetadata.get(), HOption.empty())
+          }
+          logInfo(s"Finish Run compaction at instants: [${willCompactionInstants.mkString(",")}]," +
+            s" spend: ${timer.endTimer()}ms")
+          Seq.empty[Row]
+        }
+      case _ => throw new UnsupportedOperationException(s"Unsupported compaction operation: $operation")
+    }
+  }
+
+  private def handleResponse(metadata: HoodieCommitMetadata): Unit = {
+    // Handle error
+    val writeStats = metadata.getPartitionToWriteStats.entrySet().flatMap(e => e.getValue).toList
+    val errorsCount = writeStats.map(state => state.getTotalWriteErrors).sum
+    if (errorsCount > 0) {
+      throw new HoodieException(s" Found $errorsCount when writing record")
+    }
+  }
+
+  override def build: Procedure = new RunCompactionProcedure()
+
+}
+
+object RunCompactionProcedure {
+  val NAME = "run_compaction"
+
+  def builder: Supplier[ProcedureBuilder] = new Supplier[ProcedureBuilder] {
+    override def get() = new RunCompactionProcedure
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCompactionProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCompactionProcedure.scala
new file mode 100644
index 0000000000000..d484d65323447
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCompactionProcedure.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.hudi.SparkAdapterSupport
+import org.apache.hudi.common.model.HoodieTableType
+import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.table.timeline.HoodieTimeline
+import org.apache.hudi.common.util.CompactionUtils
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
+
+import java.util.function.Supplier
+
+import scala.collection.JavaConverters._
+
+class ShowCompactionProcedure extends BaseProcedure with ProcedureBuilder with SparkAdapterSupport with Logging {
+  /**
+   * SHOW COMPACTION  ON tableIdentifier (LIMIT limit = INTEGER_VALUE)?
+   * SHOW COMPACTION  ON path = STRING (LIMIT limit = INTEGER_VALUE)?
+   */
+  private val PARAMETERS = Array[ProcedureParameter](
+    ProcedureParameter.optional(0, "table", DataTypes.StringType, None),
+    ProcedureParameter.optional(1, "path", DataTypes.StringType, None),
+    ProcedureParameter.optional(2, "limit", DataTypes.IntegerType, 20)
+  )
+
+  private val OUTPUT_TYPE = new StructType(Array[StructField](
+    StructField("timestamp", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("action", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("size", DataTypes.IntegerType, nullable = true, Metadata.empty)
+  ))
+
+  def parameters: Array[ProcedureParameter] = PARAMETERS
+
+  def outputType: StructType = OUTPUT_TYPE
+
+  override def call(args: ProcedureArgs): Seq[Row] = {
+    super.checkArgs(PARAMETERS, args)
+
+    val tableName = getArgValueOrDefault(args, PARAMETERS(0))
+    val tablePath = getArgValueOrDefault(args, PARAMETERS(1))
+    val limit = getArgValueOrDefault(args, PARAMETERS(2)).get.asInstanceOf[Int]
+
+    val basePath: String = getBasePath(tableName, tablePath)
+    val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
+
+    assert(metaClient.getTableType == HoodieTableType.MERGE_ON_READ,
+      s"Cannot show compaction on a Non Merge On Read table.")
+    val timeLine = metaClient.getActiveTimeline
+    val compactionInstants = timeLine.getInstants.iterator().asScala
+      .filter(p => p.getAction == HoodieTimeline.COMPACTION_ACTION)
+      .toSeq
+      .sortBy(f => f.getTimestamp)
+      .reverse
+      .take(limit)
+    val compactionPlans = compactionInstants.map(instant =>
+      (instant, CompactionUtils.getCompactionPlan(metaClient, instant.getTimestamp)))
+    compactionPlans.map { case (instant, plan) =>
+      Row(instant.getTimestamp, instant.getAction, plan.getOperations.size())
+    }
+  }
+
+  override def build: Procedure = new ShowCompactionProcedure()
+}
+
+object ShowCompactionProcedure {
+  val NAME = "show_compaction"
+
+  def builder: Supplier[ProcedureBuilder] = new Supplier[ProcedureBuilder] {
+    override def get() = new ShowCompactionProcedure
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/ColumnStatsIndexHelper.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/ColumnStatsIndexHelper.java
new file mode 100644
index 0000000000000..7c9649d44992f
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/ColumnStatsIndexHelper.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
+import org.apache.hudi.common.model.HoodieFileFormat;
+import org.apache.hudi.common.util.BaseFileUtils;
+import org.apache.hudi.common.util.ParquetUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.Row$;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.BinaryType;
+import org.apache.spark.sql.types.BooleanType;
+import org.apache.spark.sql.types.ByteType;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.DecimalType;
+import org.apache.spark.sql.types.DoubleType;
+import org.apache.spark.sql.types.FloatType;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.types.LongType;
+import org.apache.spark.sql.types.ShortType;
+import org.apache.spark.sql.types.StringType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.StructType$;
+import org.apache.spark.sql.types.TimestampType;
+import org.apache.spark.util.SerializableConfiguration;
+import scala.collection.JavaConversions;
+import scala.collection.JavaConverters$;
+
+import javax.annotation.Nonnull;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+// TODO merge w/ ColumnStatsIndexSupport
+public class ColumnStatsIndexHelper {
+
+  public static Pair<Object, Object>
+      fetchMinMaxValues(
+          @Nonnull DataType colType,
+          @Nonnull HoodieColumnRangeMetadata<Comparable> colMetadata) {
+    if (colType instanceof IntegerType) {
+      return Pair.of(
+          new Integer(colMetadata.getMinValue().toString()),
+          new Integer(colMetadata.getMaxValue().toString())
+      );
+    } else if (colType instanceof DoubleType) {
+      return Pair.of(
+          new Double(colMetadata.getMinValue().toString()),
+          new Double(colMetadata.getMaxValue().toString())
+      );
+    } else if (colType instanceof StringType) {
+      return Pair.of(
+          colMetadata.getMinValue().toString(),
+          colMetadata.getMaxValue().toString());
+    } else if (colType instanceof DecimalType) {
+      return Pair.of(
+          new BigDecimal(colMetadata.getMinValue().toString()),
+          new BigDecimal(colMetadata.getMaxValue().toString()));
+    } else if (colType instanceof DateType) {
+      return Pair.of(
+          java.sql.Date.valueOf(colMetadata.getMinValue().toString()),
+          java.sql.Date.valueOf(colMetadata.getMaxValue().toString()));
+    } else if (colType instanceof LongType) {
+      return Pair.of(
+          new Long(colMetadata.getMinValue().toString()),
+          new Long(colMetadata.getMaxValue().toString()));
+    } else if (colType instanceof ShortType) {
+      return Pair.of(
+          new Short(colMetadata.getMinValue().toString()),
+          new Short(colMetadata.getMaxValue().toString()));
+    } else if (colType instanceof FloatType) {
+      return Pair.of(
+          new Float(colMetadata.getMinValue().toString()),
+          new Float(colMetadata.getMaxValue().toString()));
+    } else if (colType instanceof BinaryType) {
+      return Pair.of(
+          ((ByteBuffer) colMetadata.getMinValue()).array(),
+          ((ByteBuffer) colMetadata.getMaxValue()).array());
+    } else if (colType instanceof BooleanType) {
+      return Pair.of(
+          Boolean.valueOf(colMetadata.getMinValue().toString()),
+          Boolean.valueOf(colMetadata.getMaxValue().toString()));
+    } else if (colType instanceof ByteType) {
+      return Pair.of(
+          Byte.valueOf(colMetadata.getMinValue().toString()),
+          Byte.valueOf(colMetadata.getMaxValue().toString()));
+    }  else {
+      throw new HoodieException(String.format("Not support type:  %s", colType));
+    }
+  }
+
+  /**
+   * NOTE: THIS IS ONLY USED IN TESTING CURRENTLY, SINCE DATA SKIPPING IS NOW RELYING ON
+   *       METADATA TABLE INDEX
+   *
+   * Parse min/max statistics from Parquet footers for provided columns and composes column-stats
+   * index table in the following format with 3 statistics denominated for each
+   * linear/Z-curve/Hilbert-curve-ordered column. For ex, if original table contained
+   * column {@code A}:
+   *
+   * <pre>
+   * +---------------------------+------------+------------+-------------+
+   * |          file             | A_minValue | A_maxValue | A_nullCount |
+   * +---------------------------+------------+------------+-------------+
+   * | one_base_file.parquet     |          1 |         10 |           0 |
+   * | another_base_file.parquet |        -10 |          0 |           5 |
+   * +---------------------------+------------+------------+-------------+
+   * </pre>
+   * <p>
+   * NOTE: Currently {@link TimestampType} is not supported, since Parquet writer
+   * does not support statistics for it.
+   *
+   * @VisibleForTestingOnly
+   *
+   * @param sparkSession         encompassing Spark session
+   * @param baseFilesPaths       list of base-files paths to be sourced for column-stats index
+   * @param orderedColumnSchemas target ordered columns
+   * @return Spark's {@link Dataset} holding an index table
+   * @VisibleForTesting
+   */
+  @Nonnull
+  public static Dataset<Row> buildColumnStatsTableFor(
+      @Nonnull SparkSession sparkSession,
+      @Nonnull List<String> baseFilesPaths,
+      @Nonnull List<StructField> orderedColumnSchemas
+  ) {
+    SparkContext sc = sparkSession.sparkContext();
+    JavaSparkContext jsc = new JavaSparkContext(sc);
+
+    List<String> columnNames = orderedColumnSchemas.stream()
+        .map(StructField::name)
+        .collect(Collectors.toList());
+
+    SerializableConfiguration serializableConfiguration = new SerializableConfiguration(sc.hadoopConfiguration());
+    int numParallelism = (baseFilesPaths.size() / 3 + 1);
+
+    String previousJobDescription = sc.getLocalProperty("spark.job.description");
+
+    List<HoodieColumnRangeMetadata<Comparable>> colMinMaxInfos;
+    try {
+      jsc.setJobDescription("Listing parquet column statistics");
+      colMinMaxInfos =
+          jsc.parallelize(baseFilesPaths, numParallelism)
+              .mapPartitions(paths -> {
+                ParquetUtils utils = (ParquetUtils) BaseFileUtils.getInstance(HoodieFileFormat.PARQUET);
+                Iterable<String> iterable = () -> paths;
+                return StreamSupport.stream(iterable.spliterator(), false)
+                    .flatMap(path ->
+                        utils.readRangeFromParquetMetadata(
+                                serializableConfiguration.value(),
+                                new Path(path),
+                                columnNames
+                            )
+                            .stream()
+                    )
+                    .iterator();
+              })
+              .collect();
+    } finally {
+      jsc.setJobDescription(previousJobDescription);
+    }
+
+    // Group column's metadata by file-paths of the files it belongs to
+    Map<String, List<HoodieColumnRangeMetadata<Comparable>>> filePathToColumnMetadataMap =
+        colMinMaxInfos.stream()
+            .collect(Collectors.groupingBy(HoodieColumnRangeMetadata::getFilePath));
+
+    JavaRDD<Row> allMetaDataRDD =
+        jsc.parallelize(new ArrayList<>(filePathToColumnMetadataMap.values()), 1)
+            .map(fileColumnsMetadata -> {
+              int colSize = fileColumnsMetadata.size();
+              if (colSize == 0) {
+                return null;
+              }
+
+              String filePath = fileColumnsMetadata.get(0).getFilePath();
+
+              List<Object> indexRow = new ArrayList<>();
+
+              // First columns of the Z-index's row is target file-path
+              indexRow.add(filePath);
+
+              // For each column
+              orderedColumnSchemas.forEach(colSchema -> {
+                String colName = colSchema.name();
+
+                HoodieColumnRangeMetadata<Comparable> colMetadata =
+                    fileColumnsMetadata.stream()
+                        .filter(s -> s.getColumnName().trim().equalsIgnoreCase(colName))
+                        .findFirst()
+                        .orElse(null);
+
+                DataType colType = colSchema.dataType();
+                if (colMetadata == null || colType == null) {
+                  throw new HoodieException(String.format("Cannot collect min/max statistics for column (%s)", colSchema));
+                }
+
+                Pair<Object, Object> minMaxValue = fetchMinMaxValues(colType, colMetadata);
+
+                indexRow.add(minMaxValue.getLeft());      // min
+                indexRow.add(minMaxValue.getRight());     // max
+                indexRow.add(colMetadata.getNullCount());
+              });
+
+              return Row$.MODULE$.apply(JavaConversions.asScalaBuffer(indexRow));
+            })
+            .filter(Objects::nonNull);
+
+    StructType indexSchema = ColumnStatsIndexSupport$.MODULE$.composeIndexSchema(
+        JavaConverters$.MODULE$.collectionAsScalaIterableConverter(columnNames).asScala().toSeq(),
+        StructType$.MODULE$.apply(orderedColumnSchemas)
+    );
+
+    return sparkSession.createDataFrame(allMetaDataRDD, indexSchema);
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java
similarity index 100%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
similarity index 97%
rename from hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
index a0d90e028af82..3bd6a60c4c1ea 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
@@ -28,7 +28,7 @@
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-public class TestGlobalDeleteKeyGenerator extends KeyGeneratorTestUtilities {
+public class TestGlobalDeleteRecordGenerator extends KeyGeneratorTestUtilities {
 
   private TypedProperties getCommonProps(boolean getComplexRecordKey) {
     TypedProperties properties = new TypedProperties();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
similarity index 100%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/column-stats-index-table.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/column-stats-index-table.json
new file mode 100644
index 0000000000000..297e000de4dff
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/column-stats-index-table.json
@@ -0,0 +1,4 @@
+{"c1_maxValue":769,"c1_minValue":309,"c1_nullCount":0,"c2_maxValue":" 769sdc","c2_minValue":" 309sdc","c2_nullCount":0,"c3_maxValue":919.769,"c3_minValue":76.430,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.543-08:00","c4_minValue":"2021-11-19T20:40:55.521-08:00","c4_nullCount":0,"c5_maxValue":78,"c5_minValue":32,"c5_nullCount":0,"c6_maxValue":"2020-11-14","c6_minValue":"2020-01-08","c6_nullCount":0,"c7_maxValue":"uQ==","c7_minValue":"AQ==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":9}
+{"c1_maxValue":932,"c1_minValue":0,"c1_nullCount":0,"c2_maxValue":" 932sdc","c2_minValue":" 0sdc","c2_nullCount":0,"c3_maxValue":994.355,"c3_minValue":19.000,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.549-08:00","c4_minValue":"2021-11-19T20:40:55.339-08:00","c4_nullCount":0,"c5_maxValue":94,"c5_minValue":1,"c5_nullCount":0,"c6_maxValue":"2020-09-09","c6_minValue":"2020-01-01","c6_nullCount":0,"c7_maxValue":"xw==","c7_minValue":"AA==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":8}
+{"c1_maxValue":943,"c1_minValue":89,"c1_nullCount":0,"c2_maxValue":" 943sdc","c2_minValue":" 200sdc","c2_nullCount":0,"c3_maxValue":854.690,"c3_minValue":100.556,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.549-08:00","c4_minValue":"2021-11-19T20:40:55.508-08:00","c4_nullCount":0,"c5_maxValue":95,"c5_minValue":10,"c5_nullCount":0,"c6_maxValue":"2020-10-10","c6_minValue":"2020-01-10","c6_nullCount":0,"c7_maxValue":"yA==","c7_minValue":"LA==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":10}
+{"c1_maxValue":959,"c1_minValue":74,"c1_nullCount":0,"c2_maxValue":" 959sdc","c2_minValue":" 181sdc","c2_nullCount":0,"c3_maxValue":980.213,"c3_minValue":38.740,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.550-08:00","c4_minValue":"2021-11-19T20:40:55.507-08:00","c4_nullCount":0,"c5_maxValue":97,"c5_minValue":9,"c5_nullCount":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-01-23","c6_nullCount":0,"c7_maxValue":"1Q==","c7_minValue":"Kw==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":13}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/updated-column-stats-index-table.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/updated-column-stats-index-table.json
new file mode 100644
index 0000000000000..bac789913dea0
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/updated-column-stats-index-table.json
@@ -0,0 +1,8 @@
+{"c1_maxValue":568,"c1_minValue":8,"c1_nullCount":0,"c2_maxValue":" 8sdc","c2_minValue":" 111sdc","c2_nullCount":0,"c3_maxValue":979.272,"c3_minValue":82.111,"c3_nullCount":0,"c4_maxValue":"2021-11-18T23:34:44.193-08:00","c4_minValue":"2021-11-18T23:34:44.159-08:00","c4_nullCount":0,"c5_maxValue":58,"c5_minValue":2,"c5_nullCount":0,"c6_maxValue":"2020-11-08","c6_minValue":"2020-01-01","c6_nullCount":0,"c7_maxValue":"9g==","c7_minValue":"Ag==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":15}
+{"c1_maxValue":715,"c1_minValue":76,"c1_nullCount":0,"c2_maxValue":" 76sdc","c2_minValue":" 224sdc","c2_nullCount":0,"c3_maxValue":958.579,"c3_minValue":246.427,"c3_nullCount":0,"c4_maxValue":"2021-11-18T23:34:44.199-08:00","c4_minValue":"2021-11-18T23:34:44.166-08:00","c4_nullCount":0,"c5_maxValue":73,"c5_minValue":9,"c5_nullCount":0,"c6_maxValue":"2020-11-21","c6_minValue":"2020-01-16","c6_nullCount":0,"c7_maxValue":"+g==","c7_minValue":"LA==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":12}
+{"c1_maxValue":768,"c1_minValue":59,"c1_nullCount":0,"c2_maxValue":" 768sdc","c2_minValue":" 118sdc","c2_nullCount":0,"c3_maxValue":959.131,"c3_minValue":64.768,"c3_nullCount":0,"c4_maxValue":"2021-11-18T23:34:44.201-08:00","c4_minValue":"2021-11-18T23:34:44.164-08:00","c4_nullCount":0,"c5_maxValue":78,"c5_minValue":7,"c5_nullCount":0,"c6_maxValue":"2020-11-20","c6_minValue":"2020-05-04","c6_nullCount":0,"c7_maxValue":"zw==","c7_minValue":"AA==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":7}
+{"c1_maxValue":769,"c1_minValue":309,"c1_nullCount":0,"c2_maxValue":" 769sdc","c2_minValue":" 309sdc","c2_nullCount":0,"c3_maxValue":919.769,"c3_minValue":76.430,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.543-08:00","c4_minValue":"2021-11-19T20:40:55.521-08:00","c4_nullCount":0,"c5_maxValue":78,"c5_minValue":32,"c5_nullCount":0,"c6_maxValue":"2020-11-14","c6_minValue":"2020-01-08","c6_nullCount":0,"c7_maxValue":"uQ==","c7_minValue":"AQ==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":9}
+{"c1_maxValue":770,"c1_minValue":129,"c1_nullCount":0,"c2_maxValue":" 770sdc","c2_minValue":" 129sdc","c2_nullCount":0,"c3_maxValue":977.328,"c3_minValue":153.431,"c3_nullCount":0,"c4_maxValue":"2021-11-18T23:34:44.201-08:00","c4_minValue":"2021-11-18T23:34:44.169-08:00","c4_nullCount":0,"c5_maxValue":78,"c5_minValue":14,"c5_nullCount":0,"c6_maxValue":"2020-10-21","c6_minValue":"2020-01-15","c6_nullCount":0,"c7_maxValue":"rw==","c7_minValue":"Ag==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":6}
+{"c1_maxValue":932,"c1_minValue":0,"c1_nullCount":0,"c2_maxValue":" 932sdc","c2_minValue":" 0sdc","c2_nullCount":0,"c3_maxValue":994.355,"c3_minValue":19.000,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.549-08:00","c4_minValue":"2021-11-19T20:40:55.339-08:00","c4_nullCount":0,"c5_maxValue":94,"c5_minValue":1,"c5_nullCount":0,"c6_maxValue":"2020-09-09","c6_minValue":"2020-01-01","c6_nullCount":0,"c7_maxValue":"xw==","c7_minValue":"AA==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":8}
+{"c1_maxValue":943,"c1_minValue":89,"c1_nullCount":0,"c2_maxValue":" 943sdc","c2_minValue":" 200sdc","c2_nullCount":0,"c3_maxValue":854.690,"c3_minValue":100.556,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.549-08:00","c4_minValue":"2021-11-19T20:40:55.508-08:00","c4_nullCount":0,"c5_maxValue":95,"c5_minValue":10,"c5_nullCount":0,"c6_maxValue":"2020-10-10","c6_minValue":"2020-01-10","c6_nullCount":0,"c7_maxValue":"yA==","c7_minValue":"LA==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":10}
+{"c1_maxValue":959,"c1_minValue":74,"c1_nullCount":0,"c2_maxValue":" 959sdc","c2_minValue":" 181sdc","c2_nullCount":0,"c3_maxValue":980.213,"c3_minValue":38.740,"c3_nullCount":0,"c4_maxValue":"2021-11-19T20:40:55.550-08:00","c4_minValue":"2021-11-19T20:40:55.507-08:00","c4_nullCount":0,"c5_maxValue":97,"c5_minValue":9,"c5_nullCount":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-01-23","c6_nullCount":0,"c7_maxValue":"1Q==","c7_minValue":"Kw==","c7_nullCount":0,"c8_maxValue":9,"c8_minValue":9,"c8_nullCount":0,"valueCount":13}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json
deleted file mode 100644
index 00d16c660c503..0000000000000
--- a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{"c1_maxValue":272,"c1_minValue":8,"c1_num_nulls":0,"c2_maxValue":" 8sdc","c2_minValue":" 129sdc","c2_num_nulls":0,"c3_maxValue":979.272,"c3_minValue":430.129,"c3_num_nulls":0,"c5_maxValue":28,"c5_minValue":2,"c5_num_nulls":0,"c6_maxValue":"2020-11-20","c6_minValue":"2020-03-23","c6_num_nulls":0,"c7_maxValue":"8A==","c7_minValue":"Ag==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-xxx-c000.snappy.parquet"}
-{"c1_maxValue":486,"c1_minValue":59,"c1_num_nulls":0,"c2_maxValue":" 79sdc","c2_minValue":" 111sdc","c2_num_nulls":0,"c3_maxValue":771.590,"c3_minValue":82.111,"c3_num_nulls":0,"c5_maxValue":50,"c5_minValue":7,"c5_num_nulls":0,"c6_maxValue":"2020-11-21","c6_minValue":"2020-01-22","c6_num_nulls":0,"c7_maxValue":"5g==","c7_minValue":"Ow==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-xxx-c000.snappy.parquet"}
-{"c1_maxValue":559,"c1_minValue":74,"c1_num_nulls":0,"c2_maxValue":" 74sdc","c2_minValue":" 181sdc","c2_num_nulls":0,"c3_maxValue":980.213,"c3_minValue":38.740,"c3_num_nulls":0,"c5_maxValue":57,"c5_minValue":9,"c5_num_nulls":0,"c6_maxValue":"2020-11-09","c6_minValue":"2020-01-08","c6_num_nulls":0,"c7_maxValue":"1Q==","c7_minValue":"Gw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-xxx-c000.snappy.parquet"}
-{"c1_maxValue":639,"c1_minValue":323,"c1_num_nulls":0,"c2_maxValue":" 639sdc","c2_minValue":" 323sdc","c2_num_nulls":0,"c3_maxValue":811.638,"c3_minValue":100.556,"c3_num_nulls":0,"c5_maxValue":65,"c5_minValue":33,"c5_num_nulls":0,"c6_maxValue":"2020-09-09","c6_minValue":"2020-01-23","c6_num_nulls":0,"c7_maxValue":"fw==","c7_minValue":"Kw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-xxx-c000.snappy.parquet"}
-{"c1_maxValue":719,"c1_minValue":125,"c1_num_nulls":0,"c2_maxValue":" 719sdc","c2_minValue":" 125sdc","c2_num_nulls":0,"c3_maxValue":958.579,"c3_minValue":153.125,"c3_num_nulls":0,"c5_maxValue":73,"c5_minValue":14,"c5_num_nulls":0,"c6_maxValue":"2020-09-27","c6_minValue":"2020-01-16","c6_num_nulls":0,"c7_maxValue":"+g==","c7_minValue":"OA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-xxx-c000.snappy.parquet"}
-{"c1_maxValue":770,"c1_minValue":300,"c1_num_nulls":0,"c2_maxValue":" 770sdc","c2_minValue":" 300sdc","c2_num_nulls":0,"c3_maxValue":977.328,"c3_minValue":64.768,"c3_num_nulls":0,"c5_maxValue":78,"c5_minValue":31,"c5_num_nulls":0,"c6_maxValue":"2020-10-21","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"rw==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-xxx-c000.snappy.parquet"}
-{"c1_maxValue":945,"c1_minValue":355,"c1_num_nulls":0,"c2_maxValue":" 945sdc","c2_minValue":" 355sdc","c2_num_nulls":0,"c3_maxValue":994.355,"c3_minValue":374.882,"c3_num_nulls":0,"c5_maxValue":96,"c5_minValue":37,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-02-25","c6_num_nulls":0,"c7_maxValue":"sQ==","c7_minValue":"AQ==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-xxx-c000.snappy.parquet"}
-{"c1_maxValue":959,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 959sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":916.697,"c3_minValue":19.000,"c3_num_nulls":0,"c5_maxValue":97,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"yA==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-xxx-c000.snappy.parquet"}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json
deleted file mode 100644
index a633e3170e108..0000000000000
--- a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{"c1_maxValue":559,"c1_minValue":74,"c1_num_nulls":0,"c2_maxValue":" 74sdc","c2_minValue":" 181sdc","c2_num_nulls":0,"c3_maxValue":980.213,"c3_minValue":38.740,"c3_num_nulls":0,"c5_maxValue":57,"c5_minValue":9,"c5_num_nulls":0,"c6_maxValue":"2020-11-09","c6_minValue":"2020-01-08","c6_num_nulls":0,"c7_maxValue":"1Q==","c7_minValue":"Gw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-xxx-c000.snappy.parquet"}
-{"c1_maxValue":639,"c1_minValue":323,"c1_num_nulls":0,"c2_maxValue":" 639sdc","c2_minValue":" 323sdc","c2_num_nulls":0,"c3_maxValue":811.638,"c3_minValue":100.556,"c3_num_nulls":0,"c5_maxValue":65,"c5_minValue":33,"c5_num_nulls":0,"c6_maxValue":"2020-09-09","c6_minValue":"2020-01-23","c6_num_nulls":0,"c7_maxValue":"fw==","c7_minValue":"Kw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-xxx-c000.snappy.parquet"}
-{"c1_maxValue":945,"c1_minValue":355,"c1_num_nulls":0,"c2_maxValue":" 945sdc","c2_minValue":" 355sdc","c2_num_nulls":0,"c3_maxValue":994.355,"c3_minValue":374.882,"c3_num_nulls":0,"c5_maxValue":96,"c5_minValue":37,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-02-25","c6_num_nulls":0,"c7_maxValue":"sQ==","c7_minValue":"AQ==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-xxx-c000.snappy.parquet"}
-{"c1_maxValue":959,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 959sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":916.697,"c3_minValue":19.000,"c3_num_nulls":0,"c5_maxValue":97,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"yA==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-xxx-c000.snappy.parquet"}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala
index 6b96472d4ce81..e0e5cb2666787 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala
@@ -17,33 +17,47 @@
 
 package org.apache.hudi
 
-import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper
+import org.apache.hudi.ColumnStatsIndexSupport.composeIndexSchema
 import org.apache.hudi.testutils.HoodieClientTestBase
 import org.apache.spark.sql.catalyst.expressions.{Expression, Not}
-import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.functions.{col, lower}
 import org.apache.spark.sql.hudi.DataSkippingUtils
-import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType, VarcharType}
-import org.apache.spark.sql.{Column, HoodieCatalystExpressionUtils, SparkSession}
+import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, HoodieCatalystExpressionUtils, Row, SparkSession}
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.BeforeEach
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.Arguments.arguments
 import org.junit.jupiter.params.provider.{Arguments, MethodSource}
 
+import java.sql.Timestamp
 import scala.collection.JavaConverters._
 
 // NOTE: Only A, B columns are indexed
-case class IndexRow(
-  file: String,
-  A_minValue: Long,
-  A_maxValue: Long,
-  A_num_nulls: Long,
-  B_minValue: String = null,
-  B_maxValue: String = null,
-  B_num_nulls: Long = -1
-)
+case class IndexRow(fileName: String,
+                    valueCount: Long = 1,
 
-class TestDataSkippingUtils extends HoodieClientTestBase {
+                    // Corresponding A column is LongType
+                    A_minValue: Long = -1,
+                    A_maxValue: Long = -1,
+                    A_nullCount: Long = -1,
+
+                    // Corresponding B column is StringType
+                    B_minValue: String = null,
+                    B_maxValue: String = null,
+                    B_nullCount: Long = -1,
+
+                    // Corresponding B column is TimestampType
+                    C_minValue: Timestamp = null,
+                    C_maxValue: Timestamp = null,
+                    C_nullCount: Long = -1) {
+  def toRow: Row = Row(productIterator.toSeq: _*)
+}
+
+class TestDataSkippingUtils extends HoodieClientTestBase with SparkAdapterSupport {
+
+  val exprUtils: HoodieCatalystExpressionUtils = sparkAdapter.createCatalystExpressionUtils()
 
   var spark: SparkSession = _
 
@@ -53,36 +67,38 @@ class TestDataSkippingUtils extends HoodieClientTestBase {
     spark = sqlContext.sparkSession
   }
 
-  val indexedCols = Seq("A", "B")
-  val sourceTableSchema =
+  val indexedCols: Seq[String] = Seq("A", "B", "C")
+  val sourceTableSchema: StructType =
     StructType(
       Seq(
         StructField("A", LongType),
         StructField("B", StringType),
-        StructField("C", VarcharType(32))
+        StructField("C", TimestampType),
+        StructField("D", VarcharType(32))
       )
     )
 
-  val indexSchema =
-    ColumnStatsIndexHelper.composeIndexSchema(
-      sourceTableSchema.fields.toSeq
-        .filter(f => indexedCols.contains(f.name))
-        .asJava
-    )
+  val indexSchema: StructType = composeIndexSchema(indexedCols, sourceTableSchema)
 
   @ParameterizedTest
-  @MethodSource(Array("testBaseLookupFilterExpressionsSource", "testAdvancedLookupFilterExpressionsSource"))
+  @MethodSource(
+    Array(
+      "testBasicLookupFilterExpressionsSource",
+      "testAdvancedLookupFilterExpressionsSource",
+      "testCompositeFilterExpressionsSource"
+    ))
   def testLookupFilterExpressions(sourceExpr: String, input: Seq[IndexRow], output: Seq[String]): Unit = {
-    val resolvedExpr: Expression = HoodieCatalystExpressionUtils.resolveFilterExpr(spark, sourceExpr, sourceTableSchema)
-    val lookupFilter = DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr(resolvedExpr, indexSchema)
+    // We have to fix the timezone to make sure all date-bound utilities output
+    // is consistent with the fixtures
+    spark.sqlContext.setConf(SESSION_LOCAL_TIMEZONE.key, "UTC")
 
-    val spark2 = spark
-    import spark2.implicits._
+    val resolvedExpr: Expression = exprUtils.resolveExpr(spark, sourceExpr, sourceTableSchema)
+    val lookupFilter = DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr(resolvedExpr, indexSchema)
 
-    val indexDf = spark.createDataset(input)
+    val indexDf = spark.createDataFrame(input.map(_.toRow).asJava, indexSchema)
 
     val rows = indexDf.where(new Column(lookupFilter))
-      .select("file")
+      .select("fileName")
       .collect()
       .map(_.getString(0))
       .toSeq
@@ -93,7 +109,7 @@ class TestDataSkippingUtils extends HoodieClientTestBase {
   @ParameterizedTest
   @MethodSource(Array("testStringsLookupFilterExpressionsSource"))
   def testStringsLookupFilterExpressions(sourceExpr: Expression, input: Seq[IndexRow], output: Seq[String]): Unit = {
-    val resolvedExpr = HoodieCatalystExpressionUtils.resolveFilterExpr(spark, sourceExpr, sourceTableSchema)
+    val resolvedExpr = exprUtils.resolveExpr(spark, sourceExpr, sourceTableSchema)
     val lookupFilter = DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr(resolvedExpr, indexSchema)
 
     val spark2 = spark
@@ -102,7 +118,7 @@ class TestDataSkippingUtils extends HoodieClientTestBase {
     val indexDf = spark.createDataset(input)
 
     val rows = indexDf.where(new Column(lookupFilter))
-      .select("file")
+      .select("fileName")
       .collect()
       .map(_.getString(0))
       .toSeq
@@ -117,151 +133,185 @@ object TestDataSkippingUtils {
       arguments(
         col("B").startsWith("abc").expr,
         Seq(
-          IndexRow("file_1", 0, 0, 0, "aba", "adf", 1), // may contain strings starting w/ "abc"
-          IndexRow("file_2", 0, 0, 0, "adf", "azy", 0),
-          IndexRow("file_3", 0, 0, 0, "aaa", "aba", 0)
+          IndexRow("file_1", valueCount = 1, B_minValue = "aba", B_maxValue = "adf", B_nullCount = 1), // may contain strings starting w/ "abc"
+          IndexRow("file_2", valueCount = 1, B_minValue = "adf", B_maxValue = "azy", B_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, B_minValue = "aaa", B_maxValue = "aba", B_nullCount = 0)
         ),
         Seq("file_1")),
       arguments(
         Not(col("B").startsWith("abc").expr),
         Seq(
-          IndexRow("file_1", 0, 0, 0, "aba", "adf", 1), // may contain strings starting w/ "abc"
-          IndexRow("file_2", 0, 0, 0, "adf", "azy", 0),
-          IndexRow("file_3", 0, 0, 0, "aaa", "aba", 0),
-          IndexRow("file_4", 0, 0, 0, "abc123", "abc345", 0) // all strings start w/ "abc"
+          IndexRow("file_1", valueCount = 1, B_minValue = "aba", B_maxValue = "adf", B_nullCount = 1), // may contain strings starting w/ "abc"
+          IndexRow("file_2", valueCount = 1, B_minValue = "adf", B_maxValue = "azy", B_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, B_minValue = "aaa", B_maxValue = "aba", B_nullCount = 0),
+          IndexRow("file_4", valueCount = 1, B_minValue = "abc123", B_maxValue = "abc345", B_nullCount = 0) // all strings start w/ "abc"
+        ),
+        Seq("file_1", "file_2", "file_3")),
+      arguments(
+        // Composite expression
+        Not(lower(col("B")).startsWith("abc").expr),
+        Seq(
+          IndexRow("file_1", valueCount = 1, B_minValue = "ABA", B_maxValue = "ADF", B_nullCount = 1), // may contain strings starting w/ "ABC" (after upper)
+          IndexRow("file_2", valueCount = 1, B_minValue = "ADF", B_maxValue = "AZY", B_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, B_minValue = "AAA", B_maxValue = "ABA", B_nullCount = 0),
+          IndexRow("file_4", valueCount = 1, B_minValue = "ABC123", B_maxValue = "ABC345", B_nullCount = 0) // all strings start w/ "ABC" (after upper)
         ),
         Seq("file_1", "file_2", "file_3"))
     )
   }
 
-  def testBaseLookupFilterExpressionsSource(): java.util.stream.Stream[Arguments] = {
+  def testBasicLookupFilterExpressionsSource(): java.util.stream.Stream[Arguments] = {
     java.util.stream.Stream.of(
       // TODO cases
       //    A = null
       arguments(
         "A = 0",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0)
         ),
         Seq("file_2")),
       arguments(
         "0 = A",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0)
         ),
         Seq("file_2")),
       arguments(
         "A != 0",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", 0, 0, 0) // Contains only 0s
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, 0, 0, 0) // Contains only 0s
         ),
         Seq("file_1", "file_2")),
       arguments(
         "0 != A",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", 0, 0, 0) // Contains only 0s
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, 0, 0, 0) // Contains only 0s
         ),
         Seq("file_1", "file_2")),
       arguments(
         "A < 0",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_2", "file_3")),
       arguments(
         "0 > A",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_2", "file_3")),
       arguments(
         "A > 0",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_1", "file_2")),
       arguments(
         "0 < A",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_1", "file_2")),
       arguments(
         "A <= -1",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_2", "file_3")),
       arguments(
         "-1 >= A",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_2", "file_3")),
       arguments(
         "A >= 1",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_1", "file_2")),
       arguments(
         "1 <= A",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_1", "file_2")),
       arguments(
         "A is null",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 1)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 1)
         ),
         Seq("file_2")),
       arguments(
         "A is not null",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 1)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 2, -1, 1, 1) // might still contain non-null values (if nullCount < valueCount)
+        ),
+        Seq("file_1", "file_2")),
+      arguments(
+        "A is not null",
+        Seq(
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 1) // might NOT contain non-null values (nullCount == valueCount)
         ),
         Seq("file_1")),
       arguments(
         "A in (0, 1)",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0)
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_1", "file_2")),
       arguments(
         "A not in (0, 1)",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0), // only contains 0
-          IndexRow("file_5", 1, 1, 0) // only contains 1
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0),
+          IndexRow("file_4", valueCount = 1, 0, 0, 0), // only contains 0
+          IndexRow("file_5", valueCount = 1, 1, 1, 0) // only contains 1
+        ),
+        Seq("file_1", "file_2", "file_3")),
+      arguments(
+        // Value expression containing expression, which isn't a literal
+        "A = int('0')",
+        Seq(
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        // Value expression containing reference to the other attribute (column), fallback
+        "A = D",
+        Seq(
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0)
         ),
         Seq("file_1", "file_2", "file_3"))
     )
@@ -273,69 +323,272 @@ object TestDataSkippingUtils {
         // Filter out all rows that contain either A = 0 OR A = 1
         "A != 0 AND A != 1",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0), // only contains 0
-          IndexRow("file_5", 1, 1, 0) // only contains 1
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0),
+          IndexRow("file_4", valueCount = 1, 0, 0, 0), // only contains 0
+          IndexRow("file_5", valueCount = 1, 1, 1, 0) // only contains 1
         ),
         Seq("file_1", "file_2", "file_3")),
       arguments(
         // This is an equivalent to the above expression
         "NOT(A = 0 OR A = 1)",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0), // only contains 0
-          IndexRow("file_5", 1, 1, 0) // only contains 1
+          IndexRow("file_1", valueCount = 1, 1, 2, 0),
+          IndexRow("file_2", valueCount = 1, -1, 1, 0),
+          IndexRow("file_3", valueCount = 1, -2, -1, 0),
+          IndexRow("file_4", valueCount = 1, 0, 0, 0), // only contains 0
+          IndexRow("file_5", valueCount = 1, 1, 1, 0) // only contains 1
         ),
         Seq("file_1", "file_2", "file_3")),
 
       arguments(
         // Filter out all rows that contain A = 0 AND B = 'abc'
-      "A != 0 OR B != 'abc'",
+        "A != 0 OR B != 'abc'",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0, "abc", "abc", 0), // only contains A = 0, B = 'abc'
-          IndexRow("file_5", 0, 0, 0, "abc", "abc", 0) // only contains A = 0, B = 'abc'
+          IndexRow("file_1", valueCount = 1, A_minValue = 1,  A_maxValue = 2,  A_nullCount = 0),
+          IndexRow("file_2", valueCount = 1, A_minValue = -1, A_maxValue = 1,  A_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, A_minValue = -2, A_maxValue = -1, A_nullCount =  0),
+          IndexRow("file_4", valueCount = 1, A_minValue = 0, A_maxValue = 0, A_nullCount = 0, B_minValue = "abc", B_maxValue = "abc", B_nullCount = 0), // only contains A = 0, B = 'abc'
+          IndexRow("file_5", valueCount = 1, A_minValue = 0, A_maxValue = 0, A_nullCount = 0, B_minValue = "abc", B_maxValue = "abc", B_nullCount = 0) // only contains A = 0, B = 'abc'
         ),
         Seq("file_1", "file_2", "file_3")),
       arguments(
         // This is an equivalent to the above expression
         "NOT(A = 0 AND B = 'abc')",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0, "abc", "abc", 0), // only contains A = 0, B = 'abc'
-          IndexRow("file_5", 0, 0, 0, "abc", "abc", 0) // only contains A = 0, B = 'abc'
+          IndexRow("file_1", valueCount = 1, A_minValue = 1, A_maxValue = 2, A_nullCount = 0),
+          IndexRow("file_2", valueCount = 1, A_minValue = -1, A_maxValue = 1, A_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, A_minValue = -2, A_maxValue = -1, A_nullCount = 0),
+          IndexRow("file_4", valueCount = 1, A_minValue = 0, A_maxValue = 0, A_nullCount = 0, B_minValue = "abc", B_maxValue = "abc", B_nullCount = 0), // only contains A = 0, B = 'abc'
+          IndexRow("file_5", valueCount = 1, A_minValue = 0, A_maxValue = 0, A_nullCount = 0, B_minValue = "abc", B_maxValue = "abc", B_nullCount = 0) // only contains A = 0, B = 'abc'
         ),
         Seq("file_1", "file_2", "file_3")),
 
       arguments(
-        // Queries contains expression involving non-indexed column C
-        "A = 0 AND B = 'abc' AND C = '...'",
+        // Queries contains expression involving non-indexed column D
+        "A = 0 AND B = 'abc' AND D IS NULL",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0, "aaa", "xyz", 0) // might contain A = 0 AND B = 'abc'
+          IndexRow("file_1", valueCount = 1, A_minValue = 1, A_maxValue = 2, A_nullCount = 0),
+          IndexRow("file_2", valueCount = 1, A_minValue = -1, A_maxValue = 1, A_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, A_minValue = -2, A_maxValue = -1, A_nullCount = 0),
+          IndexRow("file_4", valueCount = 1, A_minValue = 0, A_maxValue = 0, A_nullCount = 0, B_minValue = "aaa", B_maxValue = "xyz", B_nullCount = 0) // might contain A = 0 AND B = 'abc'
         ),
         Seq("file_4")),
 
       arguments(
-        // Queries contains expression involving non-indexed column C
-        "A = 0 OR B = 'abc' OR C = '...'",
+        // Queries contains expression involving non-indexed column D
+        "A = 0 OR B = 'abc' OR D IS NULL",
         Seq(
-          IndexRow("file_1", 1, 2, 0),
-          IndexRow("file_2", -1, 1, 0),
-          IndexRow("file_3", -2, -1, 0),
-          IndexRow("file_4", 0, 0, 0, "aaa", "xyz", 0) // might contain B = 'abc'
+          IndexRow("file_1", valueCount = 1, A_minValue = 1, A_maxValue = 2, A_nullCount = 0),
+          IndexRow("file_2", valueCount = 1, A_minValue = -1, A_maxValue =  1, A_nullCount = 0),
+          IndexRow("file_3", valueCount = 1, A_minValue = -2, A_maxValue =  -1, A_nullCount = 0),
+          IndexRow("file_4", valueCount = 1, B_minValue = "aaa", B_maxValue = "xyz", B_nullCount = 0) // might contain B = 'abc'
         ),
         Seq("file_1", "file_2", "file_3", "file_4"))
     )
   }
+
+  def testCompositeFilterExpressionsSource(): java.util.stream.Stream[Arguments] = {
+    // NOTE: all timestamps in UTC
+    java.util.stream.Stream.of(
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') = '03/07/2022'",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        "'03/07/2022' = date_format(C, 'MM/dd/yyyy')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        "'03/07/2022' != date_format(C, 'MM/dd/yyyy')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') != '03/07/2022'",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') < '03/08/2022'",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        "'03/08/2022' > date_format(C, 'MM/dd/yyyy')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        "'03/08/2022' < date_format(C, 'MM/dd/yyyy')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') > '03/08/2022'",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') <= '03/07/2022'",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        "'03/07/2022' >= date_format(C, 'MM/dd/yyyy')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_2")),
+      arguments(
+        "'03/09/2022' <= date_format(C, 'MM/dd/yyyy')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') >= '03/09/2022'",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') IN ('03/09/2022')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        "date_format(C, 'MM/dd/yyyy') NOT IN ('03/07/2022')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            C_minValue = new Timestamp(1646711448000L), // 03/08/2022
+            C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
+            C_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            C_minValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
+            C_nullCount = 0)
+        ),
+        Seq("file_1")),
+      arguments(
+        // Should be identical to the one above
+        "date_format(to_timestamp(B, 'yyyy-MM-dd'), 'MM/dd/yyyy') NOT IN ('03/06/2022')",
+        Seq(
+          IndexRow("file_1", valueCount = 1,
+            B_minValue = "2022-03-07", // 03/07/2022
+            B_maxValue = "2022-03-08", // 03/08/2022
+            B_nullCount = 0),
+          IndexRow("file_2", valueCount = 1,
+            B_minValue = "2022-03-06", // 03/06/2022
+            B_maxValue = "2022-03-06", // 03/06/2022
+            B_nullCount = 0)
+        ),
+        Seq("file_1"))
+
+    )
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
index 899fc4cc2a6c1..feed6fd334062 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
@@ -20,6 +20,7 @@ package org.apache.hudi
 import org.apache.hadoop.conf.Configuration
 import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE, QUERY_TYPE_SNAPSHOT_OPT_VAL}
 import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.HoodieFileIndex.DataSkippingFailureMode
 import org.apache.hudi.client.HoodieJavaWriteClient
 import org.apache.hudi.client.common.HoodieJavaEngineContext
 import org.apache.hudi.common.config.HoodieMetadataConfig
@@ -350,10 +351,12 @@ class TestHoodieFileIndex extends HoodieClientTestBase {
       PRECOMBINE_FIELD.key -> "id",
       HoodieMetadataConfig.ENABLE.key -> "true",
       HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
-      HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS.key -> "true",
       HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
     )
 
+    // If there are any failures in the Data Skipping flow, test should fail
+    spark.sqlContext.setConf(DataSkippingFailureMode.configName, DataSkippingFailureMode.Strict.value);
+
     inputDF.repartition(4)
       .write
       .format("hudi")
@@ -368,7 +371,10 @@ class TestHoodieFileIndex extends HoodieClientTestBase {
     val props = Map[String, String](
       "path" -> basePath,
       QUERY_TYPE.key -> QUERY_TYPE_SNAPSHOT_OPT_VAL,
-      DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true"
+      DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true",
+      // NOTE: Metadata Table has to be enabled on the read path as well
+      HoodieMetadataConfig.ENABLE.key -> "true",
+      HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true"
     )
 
     val fileIndex = HoodieFileIndex(spark, metaClient, Option.empty, props, NoopCache)
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
index c14d0bb063d7b..111a46261c769 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
@@ -17,11 +17,13 @@
 
 package org.apache.hudi
 
+import java.io.IOException
+import java.time.Instant
+import java.util.{Collections, Date, UUID}
+
 import org.apache.commons.io.FileUtils
-import org.apache.hadoop.fs.Path
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.client.SparkRDDWriteClient
-import org.apache.hudi.common.config.HoodieConfig
 import org.apache.hudi.common.model._
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
@@ -29,7 +31,6 @@ import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig}
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode
 import org.apache.hudi.functional.TestBootstrap
-import org.apache.hudi.hive.HiveSyncConfig
 import org.apache.hudi.keygen.{ComplexKeyGenerator, NonpartitionedKeyGenerator, SimpleKeyGenerator}
 import org.apache.hudi.testutils.DataSourceTestUtils
 import org.apache.spark.api.java.JavaSparkContext
@@ -37,7 +38,6 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{expr, lit}
 import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 import org.apache.spark.sql.hudi.command.SqlKeyGenerator
-import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.{SparkConf, SparkContext}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue, fail}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
@@ -46,11 +46,8 @@ import org.junit.jupiter.params.provider.{CsvSource, EnumSource, ValueSource}
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{spy, times, verify}
 import org.scalatest.Assertions.assertThrows
-import org.scalatest.Matchers.{assertResult, be, convertToAnyShouldWrapper, intercept}
+import org.scalatest.Matchers.{be, convertToAnyShouldWrapper, intercept}
 
-import java.io.IOException
-import java.time.Instant
-import java.util.{Collections, Date, UUID}
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters
 
@@ -887,6 +884,139 @@ class TestHoodieSparkSqlWriter {
     assert(data.select("_hoodie_partition_path").map(_.getString(0)).distinct.collect.head == "2021-10-16")
   }
 
+  @Test
+  def testNonpartitonedToDefaultKeyGen(): Unit = {
+    val _spark = spark
+    import _spark.implicits._
+    val df = Seq((1, "a1", 10, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    val options = Map(
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "id",
+      DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "ts",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "dt"
+    )
+
+    // case 1: When commit C1 specificies a key generator and commit C2 does not specify key generator
+    val (tableName1, tablePath1) = ("hoodie_test_params_1", s"$tempBasePath" + "_1")
+
+    // the first write need to specify KEYGENERATOR_CLASS_NAME params
+    df.write.format("hudi")
+      .options(options)
+      .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+      .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, classOf[NonpartitionedKeyGenerator].getName)
+      .mode(SaveMode.Overwrite).save(tablePath1)
+
+    val df2 = Seq((2, "a2", 20, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    // raise exception when no KEYGENERATOR_CLASS_NAME is specified and it is expected to default to SimpleKeyGenerator
+    val configConflictException = intercept[HoodieException] {
+      df2.write.format("hudi")
+        .options(options)
+        .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+        .mode(SaveMode.Append).save(tablePath1)
+    }
+    assert(configConflictException.getMessage.contains("Config conflict"))
+    assert(configConflictException.getMessage.contains(s"KeyGenerator:\t${classOf[SimpleKeyGenerator].getName}\t${classOf[NonpartitionedKeyGenerator].getName}"))
+  }
+
+  @Test
+  def testDefaultKeyGenToNonpartitoned(): Unit = {
+    val _spark = spark
+    import _spark.implicits._
+    val df = Seq((1, "a1", 10, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    val options = Map(
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "id",
+      DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "ts",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "dt"
+    )
+
+    // case 1: When commit C1 does not specify key generator and commit C2 specificies a key generator
+    val (tableName1, tablePath1) = ("hoodie_test_params_1", s"$tempBasePath" + "_1")
+
+    // the first write need to specify KEYGENERATOR_CLASS_NAME params
+    df.write.format("hudi")
+      .options(options)
+      .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+      .mode(SaveMode.Overwrite).save(tablePath1)
+
+    val df2 = Seq((2, "a2", 20, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    // raise exception when NonpartitionedKeyGenerator is specified
+    val configConflictException = intercept[HoodieException] {
+      df2.write.format("hudi")
+        .options(options)
+        .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+        .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, classOf[NonpartitionedKeyGenerator].getName)
+        .mode(SaveMode.Append).save(tablePath1)
+    }
+    assert(configConflictException.getMessage.contains("Config conflict"))
+    assert(configConflictException.getMessage.contains(s"KeyGenerator:\t${classOf[NonpartitionedKeyGenerator].getName}\t${classOf[SimpleKeyGenerator].getName}"))
+  }
+
+
+  @Test
+  def testNoKeyGenToSimpleKeyGen(): Unit = {
+    val _spark = spark
+    import _spark.implicits._
+    val df = Seq((1, "a1", 10, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    val options = Map(
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "id",
+      DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "ts",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "dt"
+    )
+
+    // case 1: When commit C1 specificies a key generator and commkt C2 does not specify key generator
+    val (tableName1, tablePath1) = ("hoodie_test_params_1", s"$tempBasePath" + "_1")
+
+    // the first write need to specify KEYGENERATOR_CLASS_NAME params
+    df.write.format("hudi")
+      .options(options)
+      .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+      .mode(SaveMode.Overwrite).save(tablePath1)
+
+    val df2 = Seq((2, "a2", 20, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    // No Exception Should be raised
+    try {
+      df2.write.format("hudi")
+        .options(options)
+        .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+        .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, classOf[SimpleKeyGenerator].getName)
+        .mode(SaveMode.Append).save(tablePath1)
+    } catch {
+      case _ => fail("Switching from no keygen to explicit SimpleKeyGenerator should not fail");
+    }
+  }
+
+  @Test
+  def testSimpleKeyGenToNoKeyGen(): Unit = {
+    val _spark = spark
+    import _spark.implicits._
+    val df = Seq((1, "a1", 10, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    val options = Map(
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "id",
+      DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "ts",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "dt"
+    )
+
+    // case 1: When commit C1 specificies a key generator and commkt C2 does not specify key generator
+    val (tableName1, tablePath1) = ("hoodie_test_params_1", s"$tempBasePath" + "_1")
+
+    // the first write need to specify KEYGENERATOR_CLASS_NAME params
+    df.write.format("hudi")
+      .options(options)
+      .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+      .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, classOf[SimpleKeyGenerator].getName)
+      .mode(SaveMode.Overwrite).save(tablePath1)
+
+    val df2 = Seq((2, "a2", 20, 1000, "2021-10-16")).toDF("id", "name", "value", "ts", "dt")
+    // No Exception Should be raised when default keygen is used
+    try {
+      df2.write.format("hudi")
+        .options(options)
+        .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+        .mode(SaveMode.Append).save(tablePath1)
+    } catch {
+      case _ => fail("Switching from  explicit SimpleKeyGenerator to default keygen should not fail");
+    }
+  }
+
   @Test
   def testGetOriginKeyGenerator(): Unit = {
     // for dataframe write
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
index 39ee6e0fa7187..e71973f94a164 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
@@ -221,7 +221,7 @@ class TestHoodieSparkUtils {
 
     val tableAvroSchema = new Schema.Parser().parse(avroSchemaString)
 
-    val (requiredAvroSchema, requiredStructSchema) =
+    val (requiredAvroSchema, requiredStructSchema, _) =
       HoodieSparkUtils.getRequiredSchema(tableAvroSchema, Array("ts"))
 
     assertEquals("timestamp-millis",
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
index 96d50f6b57b80..000004ace9ad4 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
@@ -21,7 +21,7 @@ import org.apache.hadoop.fs.FileSystem
 import org.apache.hudi.common.config.HoodieMetadataConfig
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.table.timeline.HoodieInstant
-import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.common.testutils.RawTripTestPayload.{deleteRecordsToStrings, recordsToStrings}
 import org.apache.hudi.config.HoodieWriteConfig
@@ -29,7 +29,7 @@ import org.apache.hudi.exception.{HoodieException, HoodieUpsertException}
 import org.apache.hudi.keygen._
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
 import org.apache.hudi.testutils.HoodieClientTestBase
-import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, HoodieMergeOnReadRDD}
+import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{col, concat, lit, udf}
 import org.apache.spark.sql.types._
@@ -56,6 +56,7 @@ class TestCOWDataSource extends HoodieClientTestBase {
     "hoodie.upsert.shuffle.parallelism" -> "4",
     "hoodie.bulkinsert.shuffle.parallelism" -> "2",
     "hoodie.delete.shuffle.parallelism" -> "1",
+    HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key() -> "true",
     DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
     DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
     DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp",
@@ -748,8 +749,17 @@ class TestCOWDataSource extends HoodieClientTestBase {
 
   @ParameterizedTest @ValueSource(booleans = Array(true, false))
   def testCopyOnWriteWithDropPartitionColumns(enableDropPartitionColumns: Boolean) {
-    val resultContainPartitionColumn = copyOnWriteTableSelect(enableDropPartitionColumns)
-    assertEquals(enableDropPartitionColumns, !resultContainPartitionColumn)
+    val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 100)).toList
+    val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+    inputDF1.write.format("org.apache.hudi")
+      .options(commonOpts)
+      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
+    val snapshotDF1 = spark.read.format("org.apache.hudi").load(basePath)
+    assertEquals(snapshotDF1.count(), 100)
+    assertEquals(3, snapshotDF1.select("partition").distinct().count())
   }
 
   @Test
@@ -862,22 +872,6 @@ class TestCOWDataSource extends HoodieClientTestBase {
     assertEquals(500, hoodieIncViewDF.count())
   }
 
-  def copyOnWriteTableSelect(enableDropPartitionColumns: Boolean): Boolean = {
-    val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 3)).toList
-    val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
-    inputDF1.write.format("org.apache.hudi")
-      .options(commonOpts)
-      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
-      .option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
-      .mode(SaveMode.Overwrite)
-      .save(basePath)
-    val snapshotDF1 = spark.read.format("org.apache.hudi")
-      .load(basePath + "/*/*/*/*")
-    snapshotDF1.registerTempTable("tmptable")
-    val result = spark.sql("select * from tmptable limit 1").collect()(0)
-    result.schema.contains(new StructField("partition", StringType, true))
-  }
-
   @Test
   def testWriteSmallPrecisionDecimalTable(): Unit = {
     val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
index ae41fa8eb551f..e3cde53951077 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
@@ -20,22 +20,30 @@ package org.apache.hudi.functional
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
+import org.apache.hudi.ColumnStatsIndexSupport.composeIndexSchema
+import org.apache.hudi.DataSourceWriteOptions.{PRECOMBINE_FIELD, RECORDKEY_FIELD}
+import org.apache.hudi.HoodieConversionUtils.toProperties
+import org.apache.hudi.common.config.HoodieMetadataConfig
+import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
 import org.apache.hudi.common.util.ParquetUtils
-import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper
+import org.apache.hudi.config.{HoodieStorageConfig, HoodieWriteConfig}
 import org.apache.hudi.testutils.HoodieClientTestBase
+import org.apache.hudi.{ColumnStatsIndexSupport, DataSourceWriteOptions}
 import org.apache.spark.sql._
-import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.functions.typedLit
 import org.apache.spark.sql.types._
 import org.junit.jupiter.api.Assertions.{assertEquals, assertNotNull, assertTrue}
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
+import org.junit.jupiter.api._
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import java.math.BigInteger
 import java.sql.{Date, Timestamp}
 import scala.collection.JavaConverters._
 import scala.util.Random
 
-class TestColumnStatsIndex extends HoodieClientTestBase {
+@Tag("functional")
+class TestColumnStatsIndex extends HoodieClientTestBase with ColumnStatsIndexSupport {
   var spark: SparkSession = _
 
   val sourceTableSchema =
@@ -63,191 +71,102 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
     cleanupSparkContexts()
   }
 
-  @Test
-  def testZIndexTableComposition(): Unit = {
-    val targetParquetTablePath = tempDir.resolve("index/zorder/input-table").toAbsolutePath.toString
-    val sourceJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
+  @ParameterizedTest
+  @ValueSource(booleans = Array(true, false))
+  def testMetadataColumnStatsIndex(forceFullLogScan: Boolean): Unit = {
+    val opts = Map(
+      "hoodie.insert.shuffle.parallelism" -> "4",
+      "hoodie.upsert.shuffle.parallelism" -> "4",
+      HoodieWriteConfig.TBL_NAME.key -> "hoodie_test",
+      RECORDKEY_FIELD.key -> "c1",
+      PRECOMBINE_FIELD.key -> "c1",
+      HoodieMetadataConfig.ENABLE.key -> "true",
+      HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
+      HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.key -> forceFullLogScan.toString,
+      HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
+    )
 
-    bootstrapParquetInputTableFromJSON(sourceJSONTablePath, targetParquetTablePath)
+    setTableName("hoodie_test")
+    initMetaClient()
 
-    val inputDf =
-    // NOTE: Schema here is provided for validation that the input date is in the appropriate format
-      spark.read
-        .schema(sourceTableSchema)
-        .parquet(targetParquetTablePath)
-
-    val zorderedCols = Seq("c1", "c2", "c3", "c5", "c6", "c7", "c8")
-    val zorderedColsSchemaFields = inputDf.schema.fields.filter(f => zorderedCols.contains(f.name)).toSeq
-
-    // {@link TimestampType} is not supported, and will throw -- hence skipping "c4"
-    val newZIndexTableDf =
-      ColumnStatsIndexHelper.buildColumnStatsTableFor(
-        inputDf.sparkSession,
-        inputDf.inputFiles.toSeq.asJava,
-        zorderedColsSchemaFields.asJava
-      )
-
-    val indexSchema =
-      ColumnStatsIndexHelper.composeIndexSchema(
-        sourceTableSchema.fields.filter(f => zorderedCols.contains(f.name)).toSeq.asJava
-      )
-
-    // Collect Z-index stats manually (reading individual Parquet files)
-    val manualZIndexTableDf =
-      buildColumnStatsTableManually(targetParquetTablePath, zorderedCols, indexSchema)
-
-    // NOTE: Z-index is built against stats collected w/in Parquet footers, which will be
-    //       represented w/ corresponding Parquet schema (INT, INT64, INT96, etc).
-    //
-    //       When stats are collected manually, produced Z-index table is inherently coerced into the
-    //       schema of the original source Parquet base-file and therefore we have to similarly coerce newly
-    //       built Z-index table (built off Parquet footers) into the canonical index schema (built off the
-    //       original source file schema)
-    assertEquals(asJson(sort(manualZIndexTableDf)), asJson(sort(newZIndexTableDf)))
-
-    // Match against expected Z-index table
-    val expectedZIndexTableDf =
-      spark.read
-        .schema(indexSchema)
-        .json(getClass.getClassLoader.getResource("index/zorder/z-index-table.json").toString)
-
-    assertEquals(asJson(sort(expectedZIndexTableDf)), asJson(sort(replace(newZIndexTableDf))))
-  }
+    val sourceJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
 
-  @Test
-  def testZIndexTableMerge(): Unit = {
-    val testZIndexPath = new Path(basePath, "zindex")
-
-    val firstParquetTablePath = tempDir.resolve("index/zorder/input-table").toAbsolutePath.toString
-    val firstJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
-
-    // Bootstrap FIRST source Parquet table
-    bootstrapParquetInputTableFromJSON(firstJSONTablePath, firstParquetTablePath)
-
-    val zorderedCols = Seq("c1", "c2", "c3", "c5", "c6", "c7", "c8")
-    val indexSchema =
-      ColumnStatsIndexHelper.composeIndexSchema(
-        sourceTableSchema.fields.filter(f => zorderedCols.contains(f.name)).toSeq.asJava
-      )
-
-    //
-    // Bootstrap Z-index table
-    //
-
-    val firstCommitInstance = "0"
-    val firstInputDf = spark.read.parquet(firstParquetTablePath)
-
-    ColumnStatsIndexHelper.updateColumnStatsIndexFor(
-      firstInputDf.sparkSession,
-      sourceTableSchema,
-      firstInputDf.inputFiles.toSeq.asJava,
-      zorderedCols.asJava,
-      testZIndexPath.toString,
-      firstCommitInstance,
-      Seq().asJava
-    )
+    // NOTE: Schema here is provided for validation that the input date is in the appropriate format
+    val inputDF = spark.read.schema(sourceTableSchema).json(sourceJSONTablePath)
 
-    // NOTE: We don't need to provide schema upon reading from Parquet, since Spark will be able
-    //       to reliably retrieve it
-    val initialZIndexTable =
-    spark.read
-      .parquet(new Path(testZIndexPath, firstCommitInstance).toString)
+    inputDF
+      .sort("c1")
+      .repartition(4, new Column("c1"))
+      .write
+      .format("hudi")
+      .options(opts)
+      .option(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key, 10 * 1024)
+      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
 
-    val expectedInitialZIndexTableDf =
-      spark.read
-        .schema(indexSchema)
-        .json(getClass.getClassLoader.getResource("index/zorder/z-index-table.json").toString)
+    metaClient = HoodieTableMetaClient.reload(metaClient)
 
-    assertEquals(asJson(sort(expectedInitialZIndexTableDf)), asJson(sort(replace(initialZIndexTable))))
+    val metadataConfig = HoodieMetadataConfig.newBuilder()
+      .fromProperties(toProperties(opts))
+      .build()
 
-    // Bootstrap SECOND source Parquet table
-    val secondParquetTablePath = tempDir.resolve("index/zorder/another-input-table").toAbsolutePath.toString
-    val secondJSONTablePath = getClass.getClassLoader.getResource("index/zorder/another-input-table-json").toString
+    val colStatsDF = readColumnStatsIndex(spark, basePath, metadataConfig, sourceTableSchema.fieldNames)
+    val transposedColStatsDF = transposeColumnStatsIndex(spark, colStatsDF, sourceTableSchema.fieldNames, sourceTableSchema)
 
-    bootstrapParquetInputTableFromJSON(secondJSONTablePath, secondParquetTablePath)
+    val expectedColStatsSchema = composeIndexSchema(sourceTableSchema.fieldNames, sourceTableSchema)
 
-    val secondCommitInstance = "1"
-    val secondInputDf =
+    // Match against expected column stats table
+    val expectedColStatsIndexTableDf =
       spark.read
-        .schema(sourceTableSchema)
-        .parquet(secondParquetTablePath)
-
-    //
-    // Update Z-index table
-    //
-
-    ColumnStatsIndexHelper.updateColumnStatsIndexFor(
-      secondInputDf.sparkSession,
-      sourceTableSchema,
-      secondInputDf.inputFiles.toSeq.asJava,
-      zorderedCols.asJava,
-      testZIndexPath.toString,
-      secondCommitInstance,
-      Seq(firstCommitInstance).asJava
-    )
+        .schema(expectedColStatsSchema)
+        .json(getClass.getClassLoader.getResource("index/zorder/column-stats-index-table.json").toString)
 
-    // NOTE: We don't need to provide schema upon reading from Parquet, since Spark will be able
-    //       to reliably retrieve it
-    val mergedZIndexTable =
-    spark.read
-      .parquet(new Path(testZIndexPath, secondCommitInstance).toString)
+    assertEquals(expectedColStatsIndexTableDf.schema, transposedColStatsDF.schema)
+    // NOTE: We have to drop the `fileName` column as it contains semi-random components
+    //       that we can't control in this test. Nevertheless, since we manually verify composition of the
+    //       ColStats Index by reading Parquet footers from individual Parquet files, this is not an issue
+    assertEquals(asJson(sort(expectedColStatsIndexTableDf)), asJson(sort(transposedColStatsDF.drop("fileName"))))
 
-    val expectedMergedZIndexTableDf =
-      spark.read
-        .schema(indexSchema)
-        .json(getClass.getClassLoader.getResource("index/zorder/z-index-table-merged.json").toString)
+    // Collect Column Stats manually (reading individual Parquet files)
+    val manualColStatsTableDF =
+      buildColumnStatsTableManually(basePath, sourceTableSchema.fieldNames, expectedColStatsSchema)
 
-    assertEquals(asJson(sort(expectedMergedZIndexTableDf)), asJson(sort(replace(mergedZIndexTable))))
-  }
+    assertEquals(asJson(sort(manualColStatsTableDF)), asJson(sort(transposedColStatsDF)))
 
-  @Test
-  def testColumnStatsTablesGarbageCollection(): Unit = {
-    val targetParquetTablePath = tempDir.resolve("index/zorder/input-table").toAbsolutePath.toString
-    val sourceJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
+    // do an upsert and validate
+    val updateJSONTablePath = getClass.getClassLoader.getResource("index/zorder/another-input-table-json").toString
+    val updateDF = spark.read
+      .schema(sourceTableSchema)
+      .json(updateJSONTablePath)
 
-    bootstrapParquetInputTableFromJSON(sourceJSONTablePath, targetParquetTablePath)
+    updateDF.repartition(4)
+      .write
+      .format("hudi")
+      .options(opts)
+      .option(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key, 10 * 1024)
+      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .mode(SaveMode.Append)
+      .save(basePath)
 
-    val inputDf = spark.read.parquet(targetParquetTablePath)
+    metaClient = HoodieTableMetaClient.reload(metaClient)
 
-    val testColumnStatsIndexPath = new Path(tempDir.resolve("zindex").toAbsolutePath.toString)
-    val fs = testColumnStatsIndexPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
+    val updatedColStatsDF = readColumnStatsIndex(spark, basePath, metadataConfig, sourceTableSchema.fieldNames)
+    val transposedUpdatedColStatsDF = transposeColumnStatsIndex(spark, updatedColStatsDF, sourceTableSchema.fieldNames, sourceTableSchema)
 
-    // Try to save statistics
-    ColumnStatsIndexHelper.updateColumnStatsIndexFor(
-      inputDf.sparkSession,
-      sourceTableSchema,
-      inputDf.inputFiles.toSeq.asJava,
-      Seq("c1","c2","c3","c5","c6","c7","c8").asJava,
-      testColumnStatsIndexPath.toString,
-      "2",
-      Seq("0", "1").asJava
-    )
+    val expectedColStatsIndexUpdatedDF =
+      spark.read
+        .schema(expectedColStatsSchema)
+        .json(getClass.getClassLoader.getResource("index/zorder/updated-column-stats-index-table.json").toString)
 
-    // Save again
-    ColumnStatsIndexHelper.updateColumnStatsIndexFor(
-      inputDf.sparkSession,
-      sourceTableSchema,
-      inputDf.inputFiles.toSeq.asJava,
-      Seq("c1","c2","c3","c5","c6","c7","c8").asJava,
-      testColumnStatsIndexPath.toString,
-      "3",
-      Seq("0", "1", "2").asJava
-    )
+    assertEquals(expectedColStatsIndexUpdatedDF.schema, transposedUpdatedColStatsDF.schema)
+    assertEquals(asJson(sort(expectedColStatsIndexUpdatedDF)), asJson(sort(transposedUpdatedColStatsDF.drop("fileName"))))
 
-    // Test old index table being cleaned up
-    ColumnStatsIndexHelper.updateColumnStatsIndexFor(
-      inputDf.sparkSession,
-      sourceTableSchema,
-      inputDf.inputFiles.toSeq.asJava,
-      Seq("c1","c2","c3","c5","c6","c7","c8").asJava,
-      testColumnStatsIndexPath.toString,
-      "4",
-      Seq("0", "1", "3").asJava
-    )
+    // Collect Column Stats manually (reading individual Parquet files)
+    val manualUpdatedColStatsTableDF =
+      buildColumnStatsTableManually(basePath, sourceTableSchema.fieldNames, expectedColStatsSchema)
 
-    assertEquals(!fs.exists(new Path(testColumnStatsIndexPath, "2")), true)
-    assertEquals(!fs.exists(new Path(testColumnStatsIndexPath, "3")), true)
-    assertEquals(fs.exists(new Path(testColumnStatsIndexPath, "4")), true)
+    assertEquals(asJson(sort(manualUpdatedColStatsTableDF)), asJson(sort(transposedUpdatedColStatsDF)))
   }
 
   @Test
@@ -289,14 +208,14 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
     })
   }
 
-  private def buildColumnStatsTableManually(tablePath: String, zorderedCols: Seq[String], indexSchema: StructType) = {
+  private def buildColumnStatsTableManually(tablePath: String, indexedCols: Seq[String], indexSchema: StructType) = {
     val files = {
       val it = fs.listFiles(new Path(tablePath), true)
       var seq = Seq[LocatedFileStatus]()
       while (it.hasNext) {
         seq = seq :+ it.next()
       }
-      seq
+      seq.filter(fs => fs.getPath.getName.endsWith(".parquet"))
     }
 
     spark.createDataFrame(
@@ -304,15 +223,16 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
         val df = spark.read.schema(sourceTableSchema).parquet(file.getPath.toString)
         val exprs: Seq[String] =
           s"'${typedLit(file.getPath.getName)}' AS file" +:
+          s"sum(1) AS valueCount" +:
             df.columns
-              .filter(col => zorderedCols.contains(col))
+              .filter(col => indexedCols.contains(col))
               .flatMap(col => {
                 val minColName = s"${col}_minValue"
                 val maxColName = s"${col}_maxValue"
                 Seq(
                   s"min($col) AS $minColName",
                   s"max($col) AS $maxColName",
-                  s"sum(cast(isnull($col) AS long)) AS ${col}_num_nulls"
+                  s"sum(cast(isnull($col) AS long)) AS ${col}_nullCount"
                 )
               })
 
@@ -343,23 +263,6 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
     fs.delete(new Path(targetParquetTablePath, "_SUCCESS"), false)
   }
 
-  def replace(ds: Dataset[Row]): DataFrame = {
-    val uuidRegexp = "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}"
-
-    val uuids =
-      ds.selectExpr(s"regexp_extract(file, '(${uuidRegexp})')")
-        .distinct()
-        .collect()
-        .map(_.getString(0))
-
-    val uuidToIdx: UserDefinedFunction = functions.udf((fileName: String) => {
-      val uuid = uuids.find(uuid => fileName.contains(uuid)).get
-      fileName.replace(uuid, "xxx")
-    })
-
-    ds.withColumn("file", uuidToIdx(ds("file")))
-  }
-
   private def generateRandomDataFrame(spark: SparkSession): DataFrame = {
     val sourceTableSchema =
       new StructType()
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala
index 96728f620f370..17715627fef38 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala
@@ -18,13 +18,14 @@
 
 package org.apache.hudi.functional
 
+import org.apache.hudi.HoodieFileIndex.DataSkippingFailureMode
 import org.apache.hudi.common.config.HoodieMetadataConfig
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
 import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
 import org.apache.hudi.config.{HoodieClusteringConfig, HoodieWriteConfig}
 import org.apache.hudi.testutils.HoodieClientTestBase
-import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions}
+import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieFileIndex}
 import org.apache.spark.sql._
 import org.apache.spark.sql.types._
 import org.junit.jupiter.api.Assertions.assertEquals
@@ -50,17 +51,20 @@ class TestLayoutOptimization extends HoodieClientTestBase {
       .add("c7", BinaryType)
       .add("c8", ByteType)
 
+  val metadataOpts = Map(
+    HoodieMetadataConfig.ENABLE.key -> "true",
+    HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true"
+  )
+
   val commonOpts = Map(
     "hoodie.insert.shuffle.parallelism" -> "4",
     "hoodie.upsert.shuffle.parallelism" -> "4",
     "hoodie.bulkinsert.shuffle.parallelism" -> "4",
-    HoodieMetadataConfig.ENABLE.key -> "true",
-    HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true",
     DataSourceWriteOptions.RECORDKEY_FIELD.key() -> "_row_key",
     DataSourceWriteOptions.PARTITIONPATH_FIELD.key() -> "partition",
     DataSourceWriteOptions.PRECOMBINE_FIELD.key() -> "timestamp",
     HoodieWriteConfig.TBL_NAME.key -> "hoodie_test"
-  )
+  ) ++ metadataOpts
 
   @BeforeEach
   override def setUp() {
@@ -92,6 +96,9 @@ class TestLayoutOptimization extends HoodieClientTestBase {
     val records = recordsToStrings(dataGen.generateInserts("001", targetRecordsCount)).toList
     val writeDf: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records, 2))
 
+    // If there are any failures in the Data Skipping flow, test should fail
+    spark.sqlContext.setConf(DataSkippingFailureMode.configName, DataSkippingFailureMode.Strict.value);
+
     writeDf.write.format("org.apache.hudi")
       .options(commonOpts)
       .option("hoodie.compact.inline", "false")
@@ -130,6 +137,7 @@ class TestLayoutOptimization extends HoodieClientTestBase {
     val readDfSkip =
       spark.read
         .option(DataSourceReadOptions.ENABLE_DATA_SKIPPING.key(), "true")
+        .options(metadataOpts)
         .format("hudi")
         .load(basePath)
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
index 5c20939cfb532..d8ebe5cbcd8b0 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
@@ -517,17 +517,14 @@ class TestMORDataSource extends HoodieClientTestBase {
     checkAnswer((1, "a0", 12, 101, false))
 
     writeData((1, "a0", 16, 97, true))
-    // Ordering value will not be honored for a delete record as the payload is sent as empty payload
-    checkAnswer((1, "a0", 16, 97, true))
+    // Ordering value will be honored, the delete record is considered as obsolete
+    // because it has smaller version number (97 < 101)
+    checkAnswer((1, "a0", 12, 101, false))
 
     writeData((1, "a0", 18, 96, false))
-    // Ideally, once a record is deleted, preCombine does not kick. So, any new record will be considered valid ignoring
-    // ordering val. But what happens ini hudi is, all records in log files are reconciled and then merged with base
-    // file. After reconciling all records from log files, it results in (1, "a0", 18, 96, false) and ths is merged with
-    // (1, "a0", 10, 100, false) in base file and hence we see (1, "a0", 10, 100, false) as it has higher preComine value.
-    // the result might differ depending on whether compaction was triggered or not(after record is deleted). In this
-    // test, no compaction is triggered and hence we see the record from base file.
-    checkAnswer((1, "a0", 10, 100, false))
+    // Ordering value will be honored, the data record is considered as obsolete
+    // because it has smaller version number (96 < 101)
+    checkAnswer((1, "a0", 12, 101, false))
   }
 
   private def writeData(data: (Int, String, Int, Int, Boolean)): Unit = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
index 918202e974682..11705f9eb1aa7 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
@@ -49,17 +49,20 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
   def testReadability(): Unit = {
     val dataGen = new HoodieTestDataGenerator()
 
-    val opts: Map[String, String] = commonOpts ++ Map(
+    val metadataOpts: Map[String, String] = Map(
       HoodieMetadataConfig.ENABLE.key -> "true",
-      HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key -> "1"
+      HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true"
     )
 
+    val combinedOpts: Map[String, String] = commonOpts ++ metadataOpts ++
+      Map(HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key -> "1")
+
     // Insert records
     val newRecords = dataGen.generateInserts("001", 100)
     val newRecordsDF = parseRecords(recordsToStrings(newRecords).asScala)
 
     newRecordsDF.write.format(hudi)
-      .options(opts)
+      .options(combinedOpts)
       .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
       .mode(SaveMode.Append)
       .save(basePath)
@@ -69,27 +72,34 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
     val updatedRecordsDF = parseRecords(recordsToStrings(updatedRecords).asScala)
 
     updatedRecordsDF.write.format(hudi)
-      .options(opts)
+      .options(combinedOpts)
       .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
       .mode(SaveMode.Append)
       .save(basePath)
 
-    val metadataDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata")
+    // Files partition of MT
+    val filesPartitionDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/files")
 
     // Smoke test
-    metadataDF.show()
+    filesPartitionDF.show()
 
     // Query w/ 0 requested columns should be working fine
-    assertEquals(4, metadataDF.count())
+    assertEquals(4, filesPartitionDF.count())
 
     val expectedKeys = Seq("2015/03/16", "2015/03/17", "2016/03/15", "__all_partitions__")
-    val keys = metadataDF.select("key")
+    val keys = filesPartitionDF.select("key")
       .collect()
       .map(_.getString(0))
       .toSeq
       .sorted
 
     assertEquals(expectedKeys, keys)
+
+    // Column Stats Index partition of MT
+    val colStatsDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
+
+    // Smoke test
+    colStatsDF.show()
   }
 
   private def parseRecords(records: Seq[String]) = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
index ca5f79191a729..2cdd7880bfec8 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
@@ -18,22 +18,21 @@
 package org.apache.hudi.functional
 
 import org.apache.avro.Schema
-import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, DefaultSource, HoodieBaseRelation, HoodieSparkUtils, HoodieUnsafeRDD}
 import org.apache.hudi.common.config.HoodieMetadataConfig
-import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.common.model.{HoodieRecord, OverwriteNonDefaultsWithLatestAvroPayload, OverwriteWithLatestAvroPayload}
+import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.common.testutils.{HadoopMapRedUtils, HoodieTestDataGenerator}
 import org.apache.hudi.config.{HoodieStorageConfig, HoodieWriteConfig}
 import org.apache.hudi.keygen.NonpartitionedKeyGenerator
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness
+import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, DefaultSource, HoodieBaseRelation, HoodieSparkUtils, HoodieUnsafeRDD}
 import org.apache.parquet.hadoop.util.counters.BenchmarkCounter
-import org.apache.spark.HoodieUnsafeRDDUtils
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Dataset, Row, SaveMode}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.{Dataset, HoodieUnsafeRDDUtils, Row, SaveMode}
 import org.junit.jupiter.api.Assertions.{assertEquals, fail}
 import org.junit.jupiter.api.{Tag, Test}
 
-import scala.:+
 import scala.collection.JavaConverters._
 
 @Tag("functional")
@@ -67,14 +66,14 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
     val projectedColumnsReadStats: Array[(String, Long)] =
       if (HoodieSparkUtils.isSpark3)
         Array(
-          ("rider", 2452),
-          ("rider,driver", 2552),
-          ("rider,driver,tip_history", 3517))
+          ("rider", 2363),
+          ("rider,driver", 2463),
+          ("rider,driver,tip_history", 3428))
       else if (HoodieSparkUtils.isSpark2)
         Array(
-          ("rider", 2595),
-          ("rider,driver", 2735),
-          ("rider,driver,tip_history", 3750))
+          ("rider", 2474),
+          ("rider,driver", 2614),
+          ("rider,driver,tip_history", 3629))
       else
         fail("Only Spark 3 and Spark 2 are currently supported")
 
@@ -107,31 +106,30 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
       else
         fail("Only Spark 3 and Spark 2 are currently supported")
 
-    // Stats for the reads fetching _all_ columns (note, how amount of bytes read
-    // is invariant of the # of columns)
-    val fullColumnsReadStats: Array[(String, Long)] =
+    // Test MOR / Snapshot / Skip-merge
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL, projectedColumnsReadStats)
+
+    // Test MOR / Snapshot / Payload-combine
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL, projectedColumnsReadStats)
+
+    // Stats for the reads fetching only _projected_ columns (note how amount of bytes read
+    // increases along w/ the # of columns) in Read Optimized mode (which is essentially equivalent to COW)
+    val projectedColumnsReadStatsReadOptimized: Array[(String, Long)] =
       if (HoodieSparkUtils.isSpark3)
         Array(
-          ("rider", 14166),
-          ("rider,driver", 14166),
-          ("rider,driver,tip_history", 14166))
+          ("rider", 2363),
+          ("rider,driver", 2463),
+          ("rider,driver,tip_history", 3428))
       else if (HoodieSparkUtils.isSpark2)
-        // TODO re-enable tests (these tests are very unstable currently)
         Array(
-          ("rider", -1),
-          ("rider,driver", -1),
-          ("rider,driver,tip_history", -1))
+          ("rider", 2474),
+          ("rider,driver", 2614),
+          ("rider,driver,tip_history", 3629))
       else
         fail("Only Spark 3 and Spark 2 are currently supported")
 
-    // Test MOR / Snapshot / Skip-merge
-    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL, projectedColumnsReadStats)
-
-    // Test MOR / Snapshot / Payload-combine
-    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL, fullColumnsReadStats)
-
     // Test MOR / Read Optimized
-    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, "null", projectedColumnsReadStats)
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, "null", projectedColumnsReadStatsReadOptimized)
   }
 
   @Test
@@ -163,17 +161,76 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
       else
         fail("Only Spark 3 and Spark 2 are currently supported")
 
-    // Stats for the reads fetching _all_ columns (currently for MOR to be able to merge
-    // records properly full row has to be fetched; note, how amount of bytes read
+    // Test MOR / Snapshot / Skip-merge
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL, projectedColumnsReadStats)
+
+    // Test MOR / Snapshot / Payload-combine
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL, projectedColumnsReadStats)
+
+    // Stats for the reads fetching only _projected_ columns (note how amount of bytes read
+    // increases along w/ the # of columns) in Read Optimized mode (which is essentially equivalent to COW)
+    val projectedColumnsReadStatsReadOptimized: Array[(String, Long)] =
+      if (HoodieSparkUtils.isSpark3)
+        Array(
+          ("rider", 2363),
+          ("rider,driver", 2463),
+          ("rider,driver,tip_history", 3428))
+      else if (HoodieSparkUtils.isSpark2)
+        Array(
+          ("rider", 2474),
+          ("rider,driver", 2614),
+          ("rider,driver,tip_history", 3629))
+      else
+        fail("Only Spark 3 and Spark 2 are currently supported")
+
+    // Test MOR / Read Optimized
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, "null", projectedColumnsReadStatsReadOptimized)
+  }
+
+  @Test
+  def testMergeOnReadSnapshotRelationWithDeltaLogsFallback(): Unit = {
+    val tablePath = s"$basePath/mor-with-logs-fallback"
+    val targetRecordsCount = 100
+    val targetUpdatedRecordsRatio = 0.5
+
+    // NOTE: This test validates MOR Snapshot Relation falling back to read "whole" row from MOR table (as
+    //       opposed to only required columns) in following cases
+    //          - Non-standard Record Payload is used: such Payload might rely on the fields that are not
+    //          being queried by the Spark, and we currently have no way figuring out what these fields are, therefore
+    //          we fallback to read whole row
+    val overriddenOpts = defaultWriteOpts ++ Map(
+      HoodieWriteConfig.WRITE_PAYLOAD_CLASS_NAME.key -> classOf[OverwriteNonDefaultsWithLatestAvroPayload].getName
+    )
+
+    val (_, schema) = bootstrapMORTable(tablePath, targetRecordsCount, targetUpdatedRecordsRatio, overriddenOpts, populateMetaFields = true)
+    val tableState = TableState(tablePath, schema, targetRecordsCount, targetUpdatedRecordsRatio)
+
+    // Stats for the reads fetching only _projected_ columns (note how amount of bytes read
+    // increases along w/ the # of columns)
+    val projectedColumnsReadStats: Array[(String, Long)] =
+    if (HoodieSparkUtils.isSpark3)
+      Array(
+        ("rider", 2452),
+        ("rider,driver", 2552),
+        ("rider,driver,tip_history", 3517))
+    else if (HoodieSparkUtils.isSpark2)
+      Array(
+        ("rider", 2595),
+        ("rider,driver", 2735),
+        ("rider,driver,tip_history", 3750))
+    else
+      fail("Only Spark 3 and Spark 2 are currently supported")
+
+    // Stats for the reads fetching _all_ columns (note, how amount of bytes read
     // is invariant of the # of columns)
     val fullColumnsReadStats: Array[(String, Long)] =
     if (HoodieSparkUtils.isSpark3)
       Array(
-        ("rider", 14166),
-        ("rider,driver", 14166),
-        ("rider,driver,tip_history", 14166))
+        ("rider", 14167),
+        ("rider,driver", 14167),
+        ("rider,driver,tip_history", 14167))
     else if (HoodieSparkUtils.isSpark2)
-      // TODO re-enable tests (these tests are very unstable currently)
+    // TODO re-enable tests (these tests are very unstable currently)
       Array(
         ("rider", -1),
         ("rider,driver", -1),
@@ -184,11 +241,8 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
     // Test MOR / Snapshot / Skip-merge
     runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL, projectedColumnsReadStats)
 
-    // Test MOR / Snapshot / Payload-combine
+    // Test MOR / Snapshot / Payload-combine (using non-standard Record Payload)
     runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL, fullColumnsReadStats)
-
-    // Test MOR / Read Optimized
-    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, "null", projectedColumnsReadStats)
   }
 
   // TODO add test for incremental query of the table with logs
@@ -222,23 +276,6 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
       else
         fail("Only Spark 3 and Spark 2 are currently supported")
 
-    // Stats for the reads fetching _all_ columns (note, how amount of bytes read
-    // is invariant of the # of columns)
-    val fullColumnsReadStats: Array[(String, Long)] =
-      if (HoodieSparkUtils.isSpark3)
-        Array(
-          ("rider", 19684),
-          ("rider,driver", 19684),
-          ("rider,driver,tip_history", 19684))
-      else if (HoodieSparkUtils.isSpark2)
-        // TODO re-enable tests (these tests are very unstable currently)
-        Array(
-          ("rider", -1),
-          ("rider,driver", -1),
-          ("rider,driver,tip_history", -1))
-      else
-        fail("Only Spark 3 and Spark 2 are currently supported")
-
     val incrementalOpts: Map[String, String] = Map(
       DataSourceReadOptions.BEGIN_INSTANTTIME.key -> "001"
     )
@@ -249,10 +286,9 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
 
     // Test MOR / Incremental / Payload-combine
     runTest(tableState, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL,
-      fullColumnsReadStats, incrementalOpts)
+      projectedColumnsReadStats, incrementalOpts)
   }
 
-
   // Test routine
   private def runTest(tableState: TableState,
                       queryType: String,
@@ -294,7 +330,7 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
       }
 
       val readColumns = targetColumns ++ relation.mandatoryColumns
-      val (_, projectedStructType) = HoodieSparkUtils.getRequiredSchema(tableState.schema, readColumns)
+      val (_, projectedStructType, _) = HoodieSparkUtils.getRequiredSchema(tableState.schema, readColumns)
 
       val row: InternalRow = rows.take(1).head
 
@@ -322,6 +358,7 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
 
     inputDF.write.format("org.apache.hudi")
       .options(opts)
+      .option(HoodieTableConfig.POPULATE_META_FIELDS.key, populateMetaFields.toString)
       .option(DataSourceWriteOptions.TABLE_TYPE.key, tableType)
       .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
       .mode(SaveMode.Overwrite)
@@ -354,6 +391,7 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
       inputDF.write.format("org.apache.hudi")
         .options(opts)
         .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
+        .option(HoodieTableConfig.POPULATE_META_FIELDS.key, populateMetaFields.toString)
         .mode(SaveMode.Append)
         .save(path)
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmark.scala
new file mode 100644
index 0000000000000..6d4317a8135e1
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmark.scala
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.hudi.benchmark
+
+
+import java.io.{OutputStream, PrintStream}
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.util.Try
+
+import org.apache.commons.io.output.TeeOutputStream
+import org.apache.commons.lang3.SystemUtils
+
+import org.apache.spark.util.Utils
+
+/**
+ * Reference from spark.
+ * Utility class to benchmark components. An example of how to use this is:
+ *  val benchmark = new Benchmark("My Benchmark", valuesPerIteration)
+ *   benchmark.addCase("V1")(<function>)
+ *   benchmark.addCase("V2")(<function>)
+ *   benchmark.run
+ * This will output the average time to run each function and the rate of each function.
+ *
+ * The benchmark function takes one argument that is the iteration that's being run.
+ *
+ * @param name name of this benchmark.
+ * @param valuesPerIteration number of values used in the test case, used to compute rows/s.
+ * @param minNumIters the min number of iterations that will be run per case, not counting warm-up.
+ * @param warmupTime amount of time to spend running dummy case iterations for JIT warm-up.
+ * @param minTime further iterations will be run for each case until this time is used up.
+ * @param outputPerIteration if true, the timing for each run will be printed to stdout.
+ * @param output optional output stream to write benchmark results to
+ */
+class HoodieBenchmark(
+     name: String,
+     valuesPerIteration: Long,
+     minNumIters: Int = 2,
+     warmupTime: FiniteDuration = 2.seconds,
+     minTime: FiniteDuration = 2.seconds,
+     outputPerIteration: Boolean = false,
+     output: Option[OutputStream] = None) {
+  import HoodieBenchmark._
+  val benchmarks = mutable.ArrayBuffer.empty[HoodieBenchmark.Case]
+
+  val out = if (output.isDefined) {
+    new PrintStream(new TeeOutputStream(System.out, output.get))
+  } else {
+    System.out
+  }
+
+  /**
+   * Adds a case to run when run() is called. The given function will be run for several
+   * iterations to collect timing statistics.
+   *
+   * @param name of the benchmark case
+   * @param numIters if non-zero, forces exactly this many iterations to be run
+   */
+  def addCase(name: String, numIters: Int = 0)(f: Int => Unit): Unit = {
+    addTimerCase(name, numIters) { timer =>
+      timer.startTiming()
+      f(timer.iteration)
+      timer.stopTiming()
+    }
+  }
+
+  /**
+   * Adds a case with manual timing control. When the function is run, timing does not start
+   * until timer.startTiming() is called within the given function. The corresponding
+   * timer.stopTiming() method must be called before the function returns.
+   *
+   * @param name of the benchmark case
+   * @param numIters if non-zero, forces exactly this many iterations to be run
+   */
+  def addTimerCase(name: String, numIters: Int = 0)(f: HoodieBenchmark.Timer => Unit): Unit = {
+    benchmarks += HoodieBenchmark.Case(name, f, numIters)
+  }
+
+  /**
+   * Runs the benchmark and outputs the results to stdout. This should be copied and added as
+   * a comment with the benchmark. Although the results vary from machine to machine, it should
+   * provide some baseline.
+   */
+  def run(): Unit = {
+    require(benchmarks.nonEmpty)
+    // scalastyle:off
+    println("Running benchmark: " + name)
+
+    val results = benchmarks.map { c =>
+      println("  Running case: " + c.name)
+      measure(valuesPerIteration, c.numIters)(c.fn)
+    }
+    println
+
+    val firstBest = results.head.bestMs
+    // The results are going to be processor specific so it is useful to include that.
+    out.println(HoodieBenchmark.getJVMOSInfo())
+    out.println(HoodieBenchmark.getProcessorName())
+    val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max))
+    out.printf(s"%-${nameLen}s %14s %14s %11s %12s %13s %10s\n",
+      name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)", "Rate(M/s)", "Per Row(ns)", "Relative")
+    out.println("-" * (nameLen + 80))
+    results.zip(benchmarks).foreach { case (result, benchmark) =>
+      out.printf(s"%-${nameLen}s %14s %14s %11s %12s %13s %10s\n",
+        benchmark.name,
+        "%5.0f" format result.bestMs,
+        "%4.0f" format result.avgMs,
+        "%5.0f" format result.stdevMs,
+        "%10.1f" format result.bestRate,
+        "%6.1f" format (1000 / result.bestRate),
+        "%3.1fX" format (firstBest / result.bestMs))
+    }
+    out.println
+    // scalastyle:on
+  }
+
+  /**
+   * Runs a single function `f` for iters, returning the average time the function took and
+   * the rate of the function.
+   */
+  def measure(num: Long, overrideNumIters: Int)(f: Timer => Unit): Result = {
+    System.gc()  // ensures garbage from previous cases don't impact this one
+    val warmupDeadline = warmupTime.fromNow
+    while (!warmupDeadline.isOverdue) {
+      f(new HoodieBenchmark.Timer(-1))
+    }
+    val minIters = if (overrideNumIters != 0) overrideNumIters else minNumIters
+    val minDuration = if (overrideNumIters != 0) 0 else minTime.toNanos
+    val runTimes = ArrayBuffer[Long]()
+    var totalTime = 0L
+    var i = 0
+    while (i < minIters || totalTime < minDuration) {
+      val timer = new HoodieBenchmark.Timer(i)
+      f(timer)
+      val runTime = timer.totalTime()
+      runTimes += runTime
+      totalTime += runTime
+
+      if (outputPerIteration) {
+        // scalastyle:off
+        println(s"Iteration $i took ${NANOSECONDS.toMicros(runTime)} microseconds")
+        // scalastyle:on
+      }
+      i += 1
+    }
+    // scalastyle:off
+    println(s"  Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms")
+    // scalastyle:on
+    assert(runTimes.nonEmpty)
+    val best = runTimes.min
+    val avg = runTimes.sum / runTimes.size
+    val stdev = if (runTimes.size > 1) {
+      math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1))
+    } else 0
+    Result(avg / 1000000.0, num / (best / 1000.0), best / 1000000.0, stdev / 1000000.0)
+  }
+}
+
+object HoodieBenchmark {
+
+  /**
+   * Object available to benchmark code to control timing e.g. to exclude set-up time.
+   *
+   * @param iteration specifies this is the nth iteration of running the benchmark case
+   */
+  class Timer(val iteration: Int) {
+    private var accumulatedTime: Long = 0L
+    private var timeStart: Long = 0L
+
+    def startTiming(): Unit = {
+      assert(timeStart == 0L, "Already started timing.")
+      timeStart = System.nanoTime
+    }
+
+    def stopTiming(): Unit = {
+      assert(timeStart != 0L, "Have not started timing.")
+      accumulatedTime += System.nanoTime - timeStart
+      timeStart = 0L
+    }
+
+    def totalTime(): Long = {
+      assert(timeStart == 0L, "Have not stopped timing.")
+      accumulatedTime
+    }
+  }
+
+  case class Case(name: String, fn: Timer => Unit, numIters: Int)
+  case class Result(avgMs: Double, bestRate: Double, bestMs: Double, stdevMs: Double)
+
+  /**
+   * This should return a user helpful processor information. Getting at this depends on the OS.
+   * This should return something like "Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz"
+   */
+  def getProcessorName(): String = {
+    val cpu = if (SystemUtils.IS_OS_MAC_OSX) {
+      Utils.executeAndGetOutput(Seq("/usr/sbin/sysctl", "-n", "machdep.cpu.brand_string"))
+        .stripLineEnd
+    } else if (SystemUtils.IS_OS_LINUX) {
+      Try {
+        val grepPath = Utils.executeAndGetOutput(Seq("which", "grep")).stripLineEnd
+        Utils.executeAndGetOutput(Seq(grepPath, "-m", "1", "model name", "/proc/cpuinfo"))
+          .stripLineEnd.replaceFirst("model name[\\s*]:[\\s*]", "")
+      }.getOrElse("Unknown processor")
+    } else {
+      System.getenv("PROCESSOR_IDENTIFIER")
+    }
+    cpu
+  }
+
+  /**
+   * This should return a user helpful JVM & OS information.
+   * This should return something like
+   * "OpenJDK 64-Bit Server VM 1.8.0_65-b17 on Linux 4.1.13-100.fc21.x86_64"
+   */
+  def getJVMOSInfo(): String = {
+    val vmName = System.getProperty("java.vm.name")
+    val runtimeVersion = System.getProperty("java.runtime.version")
+    val osName = System.getProperty("os.name")
+    val osVersion = System.getProperty("os.version")
+    s"${vmName} ${runtimeVersion} on ${osName} ${osVersion}"
+  }
+}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmarkBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmarkBase.scala
new file mode 100644
index 0000000000000..b6389a0614726
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmarkBase.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.hudi.benchmark
+
+import java.io.{File, FileOutputStream, OutputStream}
+
+import org.apache.spark.util.Utils
+
+/**
+ * Reference from spark.
+ * A base class for generate benchmark results to a file.
+ * For JDK9+, JDK major version number is added to the file names to distinguish the results.
+ */
+abstract class HoodieBenchmarkBase {
+  var output: Option[OutputStream] = None
+
+  /**
+   * Main process of the whole benchmark.
+   * Implementations of this method are supposed to use the wrapper method `runBenchmark`
+   * for each benchmark scenario.
+   */
+  def runBenchmarkSuite(mainArgs: Array[String]): Unit
+
+  final def runBenchmark(benchmarkName: String)(func: => Any): Unit = {
+    val separator = "=" * 96
+    val testHeader = (separator + '\n' + benchmarkName + '\n' + separator + '\n' + '\n').getBytes
+    output.foreach(_.write(testHeader))
+    func
+    output.foreach(_.write('\n'))
+  }
+
+  def main(args: Array[String]): Unit = {
+    // turning this on so the behavior between running benchmark via `spark-submit` or SBT will
+    // be consistent, also allow users to turn on/off certain behavior such as
+    // `spark.sql.codegen.factoryMode`
+    val regenerateBenchmarkFiles: Boolean = System.getenv("SPARK_GENERATE_BENCHMARK_FILES") == "1"
+    if (regenerateBenchmarkFiles) {
+      val version = System.getProperty("java.version").split("\\D+")(0).toInt
+      val jdkString = if (version > 8) s"-jdk$version" else ""
+      val resultFileName =
+        s"${this.getClass.getSimpleName.replace("$", "")}jdkStringsuffix-results.txt"
+      val prefix = HoodieBenchmarks.currentProjectRoot.map(_ + "/").getOrElse("")
+      val dir = new File(s"${prefix}benchmarks/")
+      if (!dir.exists()) {
+        // scalastyle:off println
+        println(s"Creating ${dir.getAbsolutePath} for benchmark results.")
+        // scalastyle:on println
+        dir.mkdirs()
+      }
+      val file = new File(dir, resultFileName)
+      if (!file.exists()) {
+        file.createNewFile()
+      }
+      output = Some(new FileOutputStream(file))
+    }
+
+    runBenchmarkSuite(args)
+
+    output.foreach { o =>
+      if (o != null) {
+        o.close()
+      }
+    }
+
+    afterAll()
+  }
+
+  def suffix: String = ""
+
+  /**
+   * Any shutdown code to ensure a clean shutdown
+   */
+  def afterAll(): Unit = {}
+
+  protected def withTempDir(f: File => Unit): Unit = {
+    val tempDir = Utils.createTempDir()
+    try f(tempDir) finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmarks.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmarks.scala
new file mode 100644
index 0000000000000..872991002f8b1
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/hudi/benchmark/HoodieBenchmarks.scala
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.hudi.benchmark
+
+import java.io.File
+import java.lang.reflect.Modifier
+import java.nio.file.{FileSystems, Paths}
+import java.util.Locale
+import scala.collection.JavaConverters._
+import scala.util.Try
+import org.apache.hbase.thirdparty.com.google.common.reflect.ClassPath
+
+/**
+ * Reference from spark.
+ * Run all benchmarks. To run this benchmark, you should build Spark with either Maven or SBT.
+ * After that, you can run as below:
+ *
+ * {{{
+ *   1. with spark-submit
+ *      bin/spark-submit --class <this class>
+ *        --jars <all spark test jars>,<spark external package jars>
+ *        <spark core test jar> <glob pattern for class> <extra arguments>
+ *   2. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 bin/spark-submit --class <this class>
+ *        --jars <all spark test jars>,<spark external package jars>
+ *        <spark core test jar> <glob pattern for class> <extra arguments>
+ *      Results will be written to all corresponding files under "benchmarks/".
+ *      Notice that it detects the sub-project's directories from jar's paths so the provided jars
+ *      should be properly placed under target (Maven build) or target/scala-* (SBT) when you
+ *      generate the files.
+ * }}}
+ *
+ * You can use a command as below to find all the test jars.
+ * Make sure to do not select duplicated jars created by different versions of builds or tools.
+ * {{{
+ *   find . -name '*-SNAPSHOT-tests.jar' | paste -sd ',' -
+ * }}}
+ *
+ * The example below runs all benchmarks and generates the results:
+ * {{{
+ *   SPARK_GENERATE_BENCHMARK_FILES=1 bin/spark-submit --class \
+ *     org.apache.spark.benchmark.Benchmarks --jars \
+ *     "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
+ *     "`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
+ *     "*"
+ * }}}
+ *
+ * The example below runs all benchmarks under "org.apache.spark.sql.execution.datasources"
+ * {{{
+ *   bin/spark-submit --class \
+ *     org.apache.spark.benchmark.Benchmarks --jars \
+ *     "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
+ *     "`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
+ *     "org.apache.spark.sql.execution.datasources.*"
+ * }}}
+ */
+
+object HoodieBenchmarks {
+  var currentProjectRoot: Option[String] = None
+
+  def main(args: Array[String]): Unit = {
+    val isFailFast = sys.env.get(
+      "SPARK_BENCHMARK_FAILFAST").map(_.toLowerCase(Locale.ROOT).trim.toBoolean).getOrElse(true)
+    val numOfSplits = sys.env.get(
+      "SPARK_BENCHMARK_NUM_SPLITS").map(_.toLowerCase(Locale.ROOT).trim.toInt).getOrElse(1)
+    val currentSplit = sys.env.get(
+      "SPARK_BENCHMARK_CUR_SPLIT").map(_.toLowerCase(Locale.ROOT).trim.toInt - 1).getOrElse(0)
+    var numBenchmark = 0
+
+    var isBenchmarkFound = false
+    val benchmarkClasses = ClassPath.from(
+      Thread.currentThread.getContextClassLoader
+    ).getTopLevelClassesRecursive("org.apache.spark").asScala.toArray
+    val matcher = FileSystems.getDefault.getPathMatcher(s"glob:${args.head}")
+
+    benchmarkClasses.foreach { info =>
+      lazy val clazz = info.load
+      lazy val runBenchmark = clazz.getMethod("main", classOf[Array[String]])
+      // isAssignableFrom seems not working with the reflected class from Guava's
+      // getTopLevelClassesRecursive.
+      require(args.length > 0, "Benchmark class to run should be specified.")
+      if (
+        info.getName.endsWith("Benchmark") &&
+          // TODO(SPARK-34927): Support TPCDSQueryBenchmark in Benchmarks
+          !info.getName.endsWith("TPCDSQueryBenchmark") &&
+          matcher.matches(Paths.get(info.getName)) &&
+          Try(runBenchmark).isSuccess && // Does this has a main method?
+          !Modifier.isAbstract(clazz.getModifiers) // Is this a regular class?
+      ) {
+        numBenchmark += 1
+        if (numBenchmark % numOfSplits == currentSplit) {
+          isBenchmarkFound = true
+
+          val targetDirOrProjDir =
+            new File(clazz.getProtectionDomain.getCodeSource.getLocation.toURI)
+              .getParentFile.getParentFile
+
+          // The root path to be referred in each benchmark.
+          currentProjectRoot = Some {
+            if (targetDirOrProjDir.getName == "target") {
+              // SBT build
+              targetDirOrProjDir.getParentFile.getCanonicalPath
+            } else {
+              // Maven build
+              targetDirOrProjDir.getCanonicalPath
+            }
+          }
+
+          // scalastyle:off println
+          println(s"Running ${clazz.getName}:")
+          // scalastyle:on println
+          // Force GC to minimize the side effect.
+          System.gc()
+          try {
+            runBenchmark.invoke(null, args.tail.toArray)
+          } catch {
+            case e: Throwable if !isFailFast =>
+              // scalastyle:off println
+              println(s"${clazz.getName} failed with the exception below:")
+              // scalastyle:on println
+              e.printStackTrace()
+          }
+        }
+      }
+    }
+
+    if (!isBenchmarkFound) throw new RuntimeException("No benchmark found to run.")
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroSerDerBenchmark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroSerDerBenchmark.scala
new file mode 100644
index 0000000000000..5e092bdb51c36
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroSerDerBenchmark.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.avro.generic.GenericRecord
+import org.apache.hudi.{AvroConversionUtils, HoodieSparkUtils}
+import org.apache.spark.hudi.benchmark.{HoodieBenchmark, HoodieBenchmarkBase}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+/**
+ * Benchmark to measure Avro SerDer performance.
+ */
+object AvroSerDerBenchmark extends HoodieBenchmarkBase {
+  protected val spark: SparkSession = getSparkSession
+
+  def getSparkSession: SparkSession = SparkSession
+    .builder()
+    .master("local[1]")
+    .config("spark.driver.memory", "8G")
+    .appName(this.getClass.getCanonicalName)
+    .getOrCreate()
+
+  def getDataFrame(numbers: Long): DataFrame = {
+    spark.range(0, numbers).toDF("id")
+      .withColumn("c1", lit("AvroSerDerBenchmark"))
+      .withColumn("c2", lit(12.99d))
+      .withColumn("c3", lit(1))
+  }
+
+  /**
+   * Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Windows 10 10.0
+   * Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+   * perf avro serializer for hoodie:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+   * ------------------------------------------------------------------------------------------------------------------------
+   * serialize internalRow to avro Record               6391           6683         413          7.8         127.8       1.0X
+   */
+  private def avroSerializerBenchmark: Unit = {
+    val benchmark = new HoodieBenchmark(s"perf avro serializer for hoodie", 50000000)
+    benchmark.addCase("serialize internalRow to avro Record") { _ =>
+      val df = getDataFrame(50000000)
+      val avroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, "record", "my")
+      spark.sparkContext.getConf.registerAvroSchemas(avroSchema)
+      HoodieSparkUtils.createRdd(df,"record", "my", Some(avroSchema)).foreach(f => f)
+    }
+    benchmark.run()
+  }
+
+  /**
+   * Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Windows 10 10.0
+   * Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+   * perf avro deserializer for hoodie:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+   * ------------------------------------------------------------------------------------------------------------------------
+   * deserialize avro Record to internalRow             1340           1360          27          7.5         134.0       1.0X
+   */
+  private def avroDeserializerBenchmark: Unit = {
+    val benchmark = new HoodieBenchmark(s"perf avro deserializer for hoodie", 10000000)
+    val df = getDataFrame(10000000)
+    val sparkSchema = df.schema
+    val avroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, "record", "my")
+    val testRdd = HoodieSparkUtils.createRdd(df,"record", "my", Some(avroSchema))
+    testRdd.cache()
+    testRdd.foreach(f => f)
+    spark.sparkContext.getConf.registerAvroSchemas(avroSchema)
+    benchmark.addCase("deserialize avro Record to internalRow") { _ =>
+      testRdd.mapPartitions { iter =>
+        val schema = AvroConversionUtils.convertStructTypeToAvroSchema(sparkSchema, "record", "my")
+        val avroToRowConverter = AvroConversionUtils.createAvroToInternalRowConverter(schema, sparkSchema)
+        iter.map(record => avroToRowConverter.apply(record.asInstanceOf[GenericRecord]).get)
+      }.foreach(f => f)
+    }
+    benchmark.run()
+  }
+
+  override def afterAll(): Unit = {
+    spark.stop()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    avroSerializerBenchmark
+    avroDeserializerBenchmark
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/CowTableReadBenchmark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/CowTableReadBenchmark.scala
new file mode 100644
index 0000000000000..ef926658ad652
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/CowTableReadBenchmark.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.hudi.{HoodieFileIndex, HoodieSparkUtils}
+import org.apache.spark.SparkConf
+import org.apache.spark.hudi.benchmark.{HoodieBenchmark, HoodieBenchmarkBase}
+import org.apache.spark.sql.{DataFrame, RowFactory, SparkSession}
+import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+import org.apache.spark.sql.types._
+import java.sql.{Date, Timestamp}
+
+import org.apache.hadoop.fs.Path
+
+import scala.util.Random
+
+object CowTableReadBenchmark extends HoodieBenchmarkBase {
+
+  protected val spark: SparkSession = getSparkSession
+
+  def getSparkSession: SparkSession = SparkSession.builder()
+    .master("local[4]")
+    .appName(this.getClass.getCanonicalName)
+    .withExtensions(new HoodieSparkSessionExtension)
+    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    .config("hoodie.insert.shuffle.parallelism", "2")
+    .config("hoodie.upsert.shuffle.parallelism", "2")
+    .config("hoodie.delete.shuffle.parallelism", "2")
+    .config("spark.sql.session.timeZone", "CTT")
+    .config(sparkConf())
+    .getOrCreate()
+
+  def sparkConf(): SparkConf = {
+    val sparkConf = new SparkConf()
+    if (HoodieSparkUtils.gteqSpark3_2) {
+      sparkConf.set("spark.sql.catalog.spark_catalog",
+        "org.apache.spark.sql.hudi.catalog.HoodieCatalog")
+    }
+    sparkConf
+  }
+
+  def prepareHoodieCowTable(tableName: String, tablePath: String) = {
+    createDataFrame(10000000).registerTempTable("ds")
+    spark.sql(
+      s"""
+         |create table $tableName using hudi
+         |tblproperties(primaryKey = 'c1')
+         |location '${tablePath}'
+         |As
+         |select * from ds
+       """.stripMargin)
+  }
+
+  private def createDataFrame(number: Int): DataFrame = {
+    val schema = new StructType()
+      .add("c1", IntegerType)
+      .add("c11", IntegerType)
+      .add("c12", IntegerType)
+      .add("c2", StringType)
+      .add("c3", DecimalType(38, 10))
+      .add("c4", TimestampType)
+      .add("c5", ShortType)
+      .add("c6", DateType)
+      .add("c7", BinaryType)
+      .add("c9", ByteType)
+
+    val rdd = spark.sparkContext.parallelize(0 to number, 2).map { item =>
+      val c1 = Integer.valueOf(item)
+      val c11 = Integer.valueOf(Random.nextInt(10000))
+      val c12 = Integer.valueOf(Random.nextInt(10000))
+      val c2 = s" ${item}abc"
+      val c3 = new java.math.BigDecimal(s"${Random.nextInt(1000)}.${Random.nextInt(100)}")
+      val c4 = new Timestamp(System.currentTimeMillis())
+      val c5 = java.lang.Short.valueOf(s"${16}")
+      val c6 = Date.valueOf(s"${2020}-${item % 11 + 1}-${item % 28 + 1}")
+      val c7 = Array(item).map(_.toByte)
+      val c8 = java.lang.Byte.valueOf("9")
+      RowFactory.create(c1, c11, c12, c2, c3, c4, c5, c6, c7, c8)
+    }
+    spark.createDataFrame(rdd, schema)
+  }
+
+  def withTempTable(tableNames: String*)(f: => Unit): Unit = {
+    try f finally tableNames.foreach(spark.catalog.dropTempView)
+  }
+
+  /**
+    * Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Windows 10 10.0
+    * Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
+    * perf cow snapshot read:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+    * ------------------------------------------------------------------------------------------------------------------------
+    * vectorized disable                                 2178           2180           2          4.6         217.8       1.0X
+    * vectorized enable                                   659            674          24         15.2          65.9       3.3X
+    */
+  private def cowTableReadBenchmark(tableName: String = "cowBenchmark"): Unit = {
+    withTempDir {f =>
+      withTempTable(tableName) {
+        prepareHoodieCowTable(tableName, new Path(f.getCanonicalPath, tableName).toUri.toString)
+        val benchmark = new HoodieBenchmark("perf cow snapshot read", 10000000)
+        benchmark.addCase("vectorized disable") { _ =>
+          spark.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
+          spark.sql(s"select c1, c3, c4, c5 from $tableName").count()
+        }
+        benchmark.addCase("vectorized enable") { _ =>
+          spark.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true")
+          spark.sql(s"select c1, c3, c4, c5 from $tableName").count()
+        }
+        benchmark.run()
+      }
+    }
+  }
+
+  override def afterAll(): Unit = {
+    spark.stop()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    cowTableReadBenchmark()
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchMark.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala
similarity index 94%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchMark.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala
index 0b0599fb2027c..d84fad4f2493c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchMark.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/execution/benchmark/SpaceCurveOptimizeBenchmark.scala
@@ -19,28 +19,28 @@
 package org.apache.spark.sql.execution.benchmark
 
 import org.apache.hadoop.fs.Path
+import org.apache.hudi.ColumnStatsIndexHelper.buildColumnStatsTableFor
 import org.apache.hudi.config.HoodieClusteringConfig.LayoutOptimizationStrategy
-import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper
 import org.apache.hudi.sort.SpaceCurveSortingHelper
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.hudi.TestHoodieSqlBase
 import org.apache.spark.sql.types.{IntegerType, StructField}
 import org.junit.jupiter.api.{Disabled, Tag, Test}
 
-import scala.util.Random
 import scala.collection.JavaConversions._
+import scala.util.Random
 
 @Tag("functional")
-object SpaceCurveOptimizeBenchMark extends TestHoodieSqlBase {
+object SpaceCurveOptimizeBenchmark extends TestHoodieSqlBase {
 
   def evalSkippingPercent(tableName: String, co1: String, co2: String, value1: Int, value2: Int): Unit= {
     val sourceTableDF = spark.sql(s"select * from ${tableName}")
 
     val orderedColsTypes = Seq(StructField(co1, IntegerType), StructField(co2, IntegerType))
-    val colStatsIndexTable = ColumnStatsIndexHelper
-      .buildColumnStatsTableFor(spark, sourceTableDF.inputFiles.toSeq, orderedColsTypes)
-      .collect()
-      .map(f => (f.getInt(1), f.getInt(2), f.getInt(4), f.getInt(5)))
+    val colStatsIndexTable =
+      buildColumnStatsTableFor(spark, sourceTableDF.inputFiles.toSeq, orderedColsTypes)
+        .collect()
+        .map(f => (f.getInt(1), f.getInt(2), f.getInt(4), f.getInt(5)))
 
     var hits = 0
     for (fileStatRow <- colStatsIndexTable) {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
index 3e7adec7d59bb..fdff6928a215f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
@@ -47,6 +47,9 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
 
     checkExceptionContain(s"alter table $tableName drop partition (dt='2021-10-01')")(
       s"$tableName is a non-partitioned table that is not allowed to drop partition")
+
+    // show partitions
+    checkAnswer(s"show partitions $tableName")(Seq.empty: _*)
   }
 
   test("Purge drop non-partitioned table") {
@@ -71,6 +74,9 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
 
     checkExceptionContain(s"alter table $tableName drop partition (dt='2021-10-01') purge")(
       s"$tableName is a non-partitioned table that is not allowed to drop partition")
+
+    // show partitions
+    checkAnswer(s"show partitions $tableName")(Seq.empty: _*)
   }
 
   Seq(false, true).foreach { urlencode =>
@@ -113,6 +119,13 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
         }
         checkAnswer(s"select dt from $tableName")(Seq(s"2021/10/02"))
         assertResult(true)(existsPath(s"${tmp.getCanonicalPath}/$tableName/$partitionPath"))
+
+        // show partitions
+        if (urlencode) {
+          checkAnswer(s"show partitions $tableName")(Seq(PartitionPathEncodeUtils.escapePathName("2021/10/02")))
+        } else {
+          checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
+        }
       }
     }
   }
@@ -157,6 +170,13 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
         }
         checkAnswer(s"select dt from $tableName")(Seq(s"2021/10/02"))
         assertResult(false)(existsPath(s"${tmp.getCanonicalPath}/$tableName/$partitionPath"))
+
+        // show partitions
+        if (urlencode) {
+          checkAnswer(s"show partitions $tableName")(Seq(PartitionPathEncodeUtils.escapePathName("2021/10/02")))
+        } else {
+          checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
+        }
       }
     }
   }
@@ -189,7 +209,10 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
     // drop 2021-10-01 partition
     spark.sql(s"alter table $tableName drop partition (dt='2021-10-01')")
 
-    checkAnswer(s"select id, name, ts, dt from $tableName") (Seq(2, "l4", "v1", "2021-10-02"))
+    checkAnswer(s"select id, name, ts, dt from $tableName")(Seq(2, "l4", "v1", "2021-10-02"))
+
+    // show partitions
+    checkAnswer(s"show partitions $tableName")(Seq("dt=2021-10-02"))
   }
 
   Seq(false, true).foreach { hiveStyle =>
@@ -199,7 +222,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
         val tablePath = s"${tmp.getCanonicalPath}/$tableName"
 
         import spark.implicits._
-        val df = Seq((1, "z3", "v1", "2021", "10", "01"), (2, "l4", "v1", "2021", "10","02"))
+        val df = Seq((1, "z3", "v1", "2021", "10", "01"), (2, "l4", "v1", "2021", "10", "02"))
           .toDF("id", "name", "ts", "year", "month", "day")
 
         df.write.format("hudi")
@@ -224,7 +247,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
 
         // not specified all partition column
         checkExceptionContain(s"alter table $tableName drop partition (year='2021', month='10')")(
-          "All partition columns need to be specified for Hoodie's dropping partition"
+          "All partition columns need to be specified for Hoodie's partition"
         )
         // drop 2021-10-01 partition
         spark.sql(s"alter table $tableName drop partition (year='2021', month='10', day='01')")
@@ -232,6 +255,13 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
         checkAnswer(s"select id, name, ts, year, month, day from $tableName")(
           Seq(2, "l4", "v1", "2021", "10", "02")
         )
+
+        // show partitions
+        if (hiveStyle) {
+          checkAnswer(s"show partitions $tableName")(Seq("year=2021/month=10/day=02"))
+        } else {
+          checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
+        }
       }
     }
   }
@@ -243,7 +273,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
         val tablePath = s"${tmp.getCanonicalPath}/$tableName"
 
         import spark.implicits._
-        val df = Seq((1, "z3", "v1", "2021", "10", "01"), (2, "l4", "v1", "2021", "10","02"))
+        val df = Seq((1, "z3", "v1", "2021", "10", "01"), (2, "l4", "v1", "2021", "10", "02"))
           .toDF("id", "name", "ts", "year", "month", "day")
 
         df.write.format("hudi")
@@ -274,6 +304,13 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
         )
         assertResult(false)(existsPath(
           s"${tmp.getCanonicalPath}/$tableName/year=2021/month=10/day=01"))
+
+        // show partitions
+        if (hiveStyle) {
+          checkAnswer(s"show partitions $tableName")(Seq("year=2021/month=10/day=02"))
+        } else {
+          checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
+        }
       }
     }
   }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
index f005a14d7f2d1..9c693f9626090 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
@@ -151,4 +151,51 @@ class TestDeleteTable extends TestHoodieSqlBase {
       }
     }
   }
+
+  test("Test Delete Table with op upsert") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach {tableType =>
+        val tableName = generateTableName
+        // create table
+        spark.sql(
+          s"""
+             |create table $tableName (
+             |  id int,
+             |  name string,
+             |  price double,
+             |  ts long
+             |) using hudi
+             | location '${tmp.getCanonicalPath}/$tableName'
+             | tblproperties (
+             |  type = '$tableType',
+             |  primaryKey = 'id',
+             |  preCombineField = 'ts',
+             |  hoodie.datasource.write.operation = 'upsert'
+             | )
+       """.stripMargin)
+        // insert data to table
+        spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000")
+        checkAnswer(s"select id, name, price, ts from $tableName")(
+          Seq(1, "a1", 10.0, 1000)
+        )
+
+        // delete data from table
+        spark.sql(s"delete from $tableName where id = 1")
+        checkAnswer(s"select count(1) from $tableName") (
+          Seq(0)
+        )
+
+        spark.sql(s"insert into $tableName select 2, 'a2', 10, 1000")
+        spark.sql(s"delete from $tableName where id = 1")
+        checkAnswer(s"select id, name, price, ts from $tableName")(
+          Seq(2, "a2", 10.0, 1000)
+        )
+
+        spark.sql(s"delete from $tableName")
+        checkAnswer(s"select count(1) from $tableName")(
+          Seq(0)
+        )
+      }
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
index ca3919599b6fa..d1f373db99e51 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
@@ -102,6 +102,22 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
     assertResult(expects.map(row => Row(row: _*)).toArray.sortBy(_.toString()))(spark.sql(sql).collect().sortBy(_.toString()))
   }
 
+  protected def checkAnswer(array: Array[Row])(expects: Seq[Any]*): Unit = {
+    assertResult(expects.map(row => Row(row: _*)).toArray)(array)
+  }
+
+  protected def checkExceptions(sql: String)(errorMsgs: Seq[String]): Unit = {
+    var hasException = false
+    try {
+      spark.sql(sql)
+    } catch {
+      case e: Throwable =>
+        assertResult(errorMsgs.contains(e.getMessage.split("\n")(0)))(true)
+        hasException = true
+    }
+    assertResult(true)(hasException)
+  }
+
   protected def checkException(sql: String)(errorMsg: String): Unit = {
     var hasException = false
     try {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
index b186381c25203..3141208db121e 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -630,4 +630,37 @@ class TestInsertTable extends TestHoodieSqlBase {
       }
     }
   }
+
+  test("Test enable hoodie.datasource.write.drop.partition.columns when write") {
+    spark.sql("set hoodie.sql.bulk.insert.enable = false")
+    Seq("mor", "cow").foreach { tableType =>
+      withTempDir { tmp =>
+        val tableName = generateTableName
+        spark.sql(
+          s"""
+             | create table $tableName (
+             |  id int,
+             |  name string,
+             |  price double,
+             |  ts long,
+             |  dt string
+             | ) using hudi
+             | partitioned by (dt)
+             | location '${tmp.getCanonicalPath}/$tableName'
+             | tblproperties (
+             |  primaryKey = 'id',
+             |  preCombineField = 'ts',
+             |  type = '$tableType',
+             |  hoodie.datasource.write.drop.partition.columns = 'true'
+             | )
+       """.stripMargin)
+        spark.sql(s"insert into $tableName partition(dt='2021-12-25') values (1, 'a1', 10, 1000)")
+        spark.sql(s"insert into $tableName partition(dt='2021-12-25') values (2, 'a2', 20, 1000)")
+        checkAnswer(s"select id, name, price, ts, dt from $tableName")(
+          Seq(1, "a1", 10, 1000, "2021-12-25"),
+          Seq(2, "a2", 20, 1000, "2021-12-25")
+        )
+      }
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala
new file mode 100644
index 0000000000000..ae828ed9f7305
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi
+
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.config.{HoodieClusteringConfig, HoodieWriteConfig}
+import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkUtils}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
+class TestSpark3DDL extends TestHoodieSqlBase {
+
+  def createTestResult(tableName: String): Array[Row] = {
+    spark.sql(s"select * from ${tableName} order by id")
+      .drop("_hoodie_commit_time", "_hoodie_commit_seqno", "_hoodie_record_key", "_hoodie_partition_path", "_hoodie_file_name").collect()
+  }
+
+  def createAndPreparePartitionTable(spark: SparkSession, tableName: String, tablePath: String, tableType: String): Unit = {
+    // try to clean tablePath
+    spark.sql(
+      s"""
+         |create table $tableName (
+         |  id int, comb int, col0 int, col1 bigint, col2 float, col3 double, col4 decimal(10,4), col5 string, col6 date, col7 timestamp, col8 boolean, col9 binary, par date
+         |) using hudi
+         | location '$tablePath'
+         | options (
+         |  type = '$tableType',
+         |  primaryKey = 'id',
+         |  preCombineField = 'comb'
+         | )
+         | partitioned by (par)
+             """.stripMargin)
+    spark.sql(
+      s"""
+         | insert into $tableName values
+         | (1,1,11,100001,101.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+         | (2,2,12,100002,102.02,1002.0002,100002.0002,'a000002','2021-12-25','2021-12-25 12:02:02',true,'a02','2021-12-25'),
+         | (3,3,13,100003,103.03,1003.0003,100003.0003,'a000003','2021-12-25','2021-12-25 12:03:03',false,'a03','2021-12-25'),
+         | (4,4,14,100004,104.04,1004.0004,100004.0004,'a000004','2021-12-26','2021-12-26 12:04:04',true,'a04','2021-12-26'),
+         | (5,5,15,100005,105.05,1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
+         |""".stripMargin)
+  }
+
+  test("Test multi change data type") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
+        val tableName = generateTableName
+        val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
+        if (HoodieSparkUtils.gteqSpark3_1) {
+          spark.sql("set hoodie.schema.on.read.enable=true")
+          createAndPreparePartitionTable(spark, tableName, tablePath, tableType)
+          // date -> string -> date
+          spark.sql(s"alter table $tableName alter column col6 type String")
+          checkAnswer(spark.sql(s"select col6 from $tableName where id = 1").collect())(
+            Seq("2021-12-25")
+          )
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,1,13.0,100001,101.01,1001.0001,100001.0001,'a000001','2021-12-26','2021-12-25 12:01:01',true,'a01','2021-12-25')
+               |""".stripMargin)
+          spark.sql(s"alter table $tableName alter column col6 type date")
+          checkAnswer(spark.sql(s"select col6 from $tableName where id = 1 or id = 5 order by id").collect())(
+            Seq(java.sql.Date.valueOf("2021-12-26")), // value from new file
+            Seq(java.sql.Date.valueOf("2021-12-26"))  // value from old file
+          )
+          // int -> double -> decimal
+          spark.sql(s"alter table $tableName alter column col0 type double")
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,1,13.0,100001,101.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+               | (6,1,14.0,100001,101.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25')
+               |""".stripMargin)
+          spark.sql(s"alter table $tableName alter column col0 type decimal(16, 4)")
+          checkAnswer(spark.sql(s"select col0 from $tableName where id = 1 or id = 6 order by id").collect())(
+            Seq(new java.math.BigDecimal("13.0000")),
+            Seq(new java.math.BigDecimal("14.0000"))
+          )
+          // float -> double -> decimal
+          spark.sql(s"alter table $tableName alter column col2 type double")
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,1,13.0,100001,901.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+               | (6,1,14.0,100001,601.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25')
+               |""".stripMargin)
+          spark.sql(s"alter table $tableName alter column col2 type decimal(16, 4)")
+          checkAnswer(spark.sql(s"select col0, col2 from $tableName where id = 1 or id = 6 order by id").collect())(
+            Seq(new java.math.BigDecimal("13.0000"), new java.math.BigDecimal("901.0100")),
+            Seq(new java.math.BigDecimal("14.0000"), new java.math.BigDecimal("601.0100"))
+          )
+          // long -> double -> decimal
+          spark.sql(s"alter table $tableName alter column col1 type double")
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,1,13.0,700001.0,901.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25')
+               |""".stripMargin)
+          spark.sql(s"alter table $tableName alter column col1 type decimal(16, 4)")
+          checkAnswer(spark.sql(s"select col0, col2, col1 from $tableName where id = 1 or id = 6 order by id").collect())(
+            Seq(new java.math.BigDecimal("13.0000"), new java.math.BigDecimal("901.0100"), new java.math.BigDecimal("700001.0000")),
+            Seq(new java.math.BigDecimal("14.0000"), new java.math.BigDecimal("601.0100"), new java.math.BigDecimal("100001.0000"))
+          )
+          spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true)
+          spark.sessionState.catalog.refreshTable(TableIdentifier(tableName))
+        }
+      }
+    }
+  }
+
+  test("Test multi change data type2") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
+        val tableName = generateTableName
+        val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
+        if (HoodieSparkUtils.gteqSpark3_1) {
+          spark.sql("set hoodie.schema.on.read.enable=true")
+          createAndPreparePartitionTable(spark, tableName, tablePath, tableType)
+          // float -> double -> decimal -> String
+          spark.sql(s"alter table $tableName alter column col2 type double")
+          spark.sql(s"alter table $tableName alter column col2 type decimal(16, 4)")
+          spark.sql(s"alter table $tableName alter column col2 type String")
+          checkAnswer(spark.sql(s"select col2 from $tableName where id = 1").collect())(
+            Seq("101.01")
+          )
+          // long -> double -> decimal -> string
+          spark.sql(s"alter table $tableName alter column col1 type double")
+          spark.sql(s"alter table $tableName alter column col1 type decimal(16, 4)")
+          spark.sql(s"alter table $tableName alter column col1 type String")
+          checkAnswer(spark.sql(s"select col1 from $tableName where id = 1").collect())(
+            Seq("100001")
+          )
+          // int -> double -> decimal -> String
+          spark.sql(s"alter table $tableName alter column col0 type double")
+          spark.sql(s"alter table $tableName alter column col0 type decimal(16, 4)")
+          spark.sql(s"alter table $tableName alter column col0 type String")
+          checkAnswer(spark.sql(s"select col0 from $tableName where id = 1").collect())(
+            Seq("11")
+          )
+        }
+      }
+    }
+  }
+
+  test("Test Partition Table alter ") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
+        val tableName = generateTableName
+        val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
+        if (HoodieSparkUtils.gteqSpark3_1) {
+          spark.sql("set hoodie.schema.on.read.enable=true")
+          createAndPreparePartitionTable(spark, tableName, tablePath, tableType)
+
+          // test set properties
+          spark.sql(s"alter table $tableName set tblproperties(comment='it is a hudi table', 'key1'='value1', 'key2'='value2')")
+          val meta = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+          assert(meta.comment.get.equals("it is a hudi table"))
+          assert(Seq("key1", "key2").filter(meta.properties.contains(_)).size == 2)
+          // test unset propertes
+          spark.sql(s"alter table $tableName unset tblproperties(comment, 'key1', 'key2')")
+          val unsetMeta = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+          assert(Seq("key1", "key2").filter(unsetMeta.properties.contains(_)).size == 0)
+          assert(unsetMeta.comment.isEmpty)
+          // test forbidden operation.
+          checkException(s"Alter table $tableName add columns(col_new1 int first)")("forbid adjust top-level columns position by using through first syntax")
+          HoodieRecord.HOODIE_META_COLUMNS.subList(0, HoodieRecord.HOODIE_META_COLUMNS.size - 2).asScala.foreach {f =>
+            checkException(s"Alter table $tableName add columns(col_new1 int after $f)")("forbid adjust the position of ordinary columns between meta columns")
+          }
+          Seq("id", "comb", "par").foreach { col =>
+            checkException(s"alter table $tableName drop column $col")("cannot support apply changes for primaryKey/CombineKey/partitionKey")
+            checkException(s"alter table $tableName rename column $col to ${col + col}")("cannot support apply changes for primaryKey/CombineKey/partitionKey")
+          }
+          // check duplicate add or rename
+          // keep consistent with hive, column names insensitive
+          checkExceptions(s"alter table $tableName rename column col0 to col9")(Seq("cannot rename column: col0 to a existing name",
+            "Cannot rename column, because col9 already exists in root"))
+          checkExceptions(s"alter table $tableName rename column col0 to COL9")(Seq("cannot rename column: col0 to a existing name", "Cannot rename column, because COL9 already exists in root"))
+          checkExceptions(s"alter table $tableName add columns(col9 int first)")(Seq("cannot add column: col9 which already exist", "Cannot add column, because col9 already exists in root"))
+          checkExceptions(s"alter table $tableName add columns(COL9 int first)")(Seq("cannot add column: COL9 which already exist", "Cannot add column, because COL9 already exists in root"))
+          // test add comment for columns / alter columns comment
+          spark.sql(s"alter table $tableName add columns(col1_new int comment 'add new columns col1_new after id' after id)")
+          spark.sql(s"alter table $tableName alter column col9 comment 'col9 desc'")
+          val schema = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).schema
+          assert(schema.filter(p => p.name.equals("col1_new")).get(0).getComment().get == "add new columns col1_new after id")
+          assert(schema.filter(p => p.name.equals("col9")).get(0).getComment().get == "col9 desc")
+          // test change column type float to double
+          spark.sql(s"alter table $tableName alter column col2 type double")
+          spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 2 order by id").show(false)
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,3,1,11,100001,101.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+               | (6,6,5,15,100005,105.05,1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
+               |""".stripMargin)
+
+          spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 6 or id = 2 order by id").show(false)
+          // try schedule compact
+          if (tableType == "mor") spark.sql(s"schedule compaction  on $tableName")
+          // test change column type decimal(10,4) 为decimal(18,8)
+          spark.sql(s"alter table $tableName alter column col4 type decimal(18, 8)")
+          spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 2 order by id").show(false)
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (5,6,5,15,100005,105.05,1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
+               |""".stripMargin)
+
+          spark.sql(s"select id, col1_new, col4 from $tableName where id = 1 or id = 6 or id = 2 order by id").show(false)
+          // test change column type float to double
+          spark.sql(s"alter table $tableName alter column col2 type string")
+          spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 2 order by id").show(false)
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,3,1,11,100001,'101.01',1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+               | (6,6,5,15,100005,'105.05',1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
+               |""".stripMargin)
+
+          spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 6 or id = 2 order by id").show(false)
+          // try schedule compact
+          if (tableType == "mor") spark.sql(s"schedule compaction  on $tableName")
+          // if tableType is mor, check compaction
+          if (tableType == "mor") {
+            val compactionRows = spark.sql(s"show compaction on $tableName limit 10").collect()
+            val timestamps = compactionRows.map(_.getString(0))
+            assertResult(2)(timestamps.length)
+            spark.sql(s"run compaction on $tableName at ${timestamps(1)}")
+            spark.sql(s"run compaction on $tableName at ${timestamps(0)}")
+          }
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,3,1,11,100001,'101.01',1001.0001,100009.0001,'a000008','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+               | (11,3,1,11,100001,'101.01',1001.0001,100011.0001,'a000008','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
+               | (6,6,5,15,100005,'105.05',1005.0005,100007.0005,'a000009','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
+               |""".stripMargin)
+
+          spark.sql(s"select id, col1_new, col2 from $tableName where id = 1 or id = 6 or id = 2 or id = 11 order by id").show(false)
+        }
+      }
+    }
+  }
+
+  test("Test Chinese table ") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
+        val tableName = generateTableName
+        val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
+        if (HoodieSparkUtils.gteqSpark3_1) {
+          spark.sql("set hoodie.schema.on.read.enable=true")
+          spark.sql(
+            s"""
+               |create table $tableName (
+               |  id int, comb int, `名字` string, col9 string, `成绩` int, `身高` float, `体重` double, `上次更新时间` date, par date
+               |) using hudi
+               | location '$tablePath'
+               | options (
+               |  type = '$tableType',
+               |  primaryKey = 'id',
+               |  preCombineField = 'comb'
+               | )
+               | partitioned by (par)
+             """.stripMargin)
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,3,'李明', '读书', 100,180.0001,99.0001,'2021-12-25', '2021-12-26')
+               |""".stripMargin)
+          spark.sql(s"alter table $tableName rename column col9 to `爱好_Best`")
+
+          // update current table to produce log files for mor
+          spark.sql(
+            s"""
+               | insert into $tableName values
+               | (1,3,'李明', '读书', 100,180.0001,99.0001,'2021-12-26', '2021-12-26')
+               |""".stripMargin)
+
+          // alter date to string
+          spark.sql(s"alter table $tableName alter column `上次更新时间` type string ")
+          checkAnswer(spark.sql(s"select `上次更新时间` from $tableName").collect())(
+            Seq("2021-12-26")
+          )
+          // alter string to date
+          spark.sql(s"alter table $tableName alter column `上次更新时间` type date ")
+          spark.sql(s"select `上次更新时间` from $tableName").collect()
+          checkAnswer(spark.sql(s"select `上次更新时间` from $tableName").collect())(
+            Seq(java.sql.Date.valueOf("2021-12-26"))
+          )
+        }
+      }
+    }
+  }
+
+
+  test("Test Alter Table") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
+        val tableName = generateTableName
+        val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
+        if (HoodieSparkUtils.gteqSpark3_1) {
+          spark.sql("set hoodie.schema.on.read.enable=true")
+          spark.sql(
+            s"""
+               |create table $tableName (
+               |  id int,
+               |  name string,
+               |  price double,
+               |  ts long
+               |) using hudi
+               | location '$tablePath'
+               | options (
+               |  type = '$tableType',
+               |  primaryKey = 'id',
+               |  preCombineField = 'ts'
+               | )
+             """.stripMargin)
+          spark.sql(s"show create table ${tableName}").show(false)
+          spark.sql(s"insert into ${tableName} values (1, 'jack', 0.9, 1000)")
+          spark.sql(s"update ${tableName} set price = 1.9  where id =  1")
+
+          spark.sql(s"alter table ${tableName} alter column id type long")
+          checkAnswer(createTestResult(tableName))(
+            Seq(1, "jack", 1.9, 1000)
+          )
+          // test add action, include position change
+          spark.sql(s"alter table ${tableName} add columns(ext1 string comment 'add ext1' after name)")
+          spark.sql(s"insert into ${tableName} values (2, 'jack', 'exx1', 0.9, 1000)")
+          checkAnswer(createTestResult(tableName))(
+            Seq(1, "jack", null, 1.9, 1000), Seq(2, "jack","exx1", 0.9, 1000)
+          )
+          // test rename
+          spark.sql(s"alter table ${tableName} rename column price to newprice")
+          checkAnswer(createTestResult(tableName))(
+            Seq(1, "jack", null, 1.9, 1000), Seq(2, "jack","exx1", 0.9, 1000)
+          )
+          spark.sql(s"update ${tableName} set ext1 =  'haha' where id =  1 ")
+          checkAnswer(createTestResult(tableName))(
+            Seq(1, "jack", "haha", 1.9, 1000), Seq(2, "jack","exx1", 0.9, 1000)
+          )
+          // drop column newprice
+
+          spark.sql(s"alter table ${tableName} drop column newprice")
+          checkAnswer(createTestResult(tableName))(
+            Seq(1, "jack", "haha", 1000), Seq(2, "jack","exx1", 1000)
+          )
+          // add newprice back
+          spark.sql(s"alter table ${tableName} add columns(newprice string comment 'add newprice back' after ext1)")
+          checkAnswer(createTestResult(tableName))(
+            Seq(1, "jack", "haha", null, 1000), Seq(2, "jack","exx1", null, 1000)
+          )
+        }
+      }
+    }
+  }
+
+  test("Test Alter Table complex") {
+    withTempDir { tmp =>
+      Seq("cow", "mor").foreach { tableType =>
+        val tableName = generateTableName
+        val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
+        if (HoodieSparkUtils.gteqSpark3_1) {
+          spark.sql("set hoodie.schema.on.read.enable=true")
+          spark.sql(
+            s"""
+               |create table $tableName (
+               |  id int,
+               |  name string,
+               |  members map<String, struct<n:string, a:int>>,
+               |  user struct<name:string, age:int, score: int>,
+               |  ts long
+               |) using hudi
+               | location '$tablePath'
+               | options (
+               |  type = '$tableType',
+               |  primaryKey = 'id',
+               |  preCombineField = 'ts'
+               | )
+             """.stripMargin)
+
+          spark.sql(s"alter table $tableName alter column members.value.a first")
+
+          spark.sql(s"insert into ${tableName} values(1, 'jack', map('k1', struct('v1', 100), 'k2', struct('v2', 200)), struct('jackStruct', 29, 100), 1000)")
+
+          // rename column
+          spark.sql(s"alter table ${tableName} rename column user to userx")
+
+          checkAnswer(spark.sql(s"select ts, userx.score, id, userx.age, name from ${tableName}").collect())(
+            Seq(1000, 100, 1, 29, "jack")
+          )
+
+          // drop column
+          spark.sql(s"alter table ${tableName} drop columns(name, userx.name, userx.score)")
+
+          spark.sql(s"select * from ${tableName}").show(false)
+
+          // add cols back, and adjust cols position
+          spark.sql(s"alter table ${tableName} add columns(name string comment 'add name back' after userx," +
+            s" userx.name string comment 'add userx.name back' first, userx.score int comment 'add userx.score back' after age)")
+
+          // query new columns: name, userx.name, userx.score, those field should not be readed.
+          checkAnswer(spark.sql(s"select name, userx.name, userx.score from ${tableName}").collect())(Seq(null, null, null))
+
+          // insert again
+          spark.sql(s"insert into ${tableName} values(2 , map('k1', struct('v1', 100), 'k2', struct('v2', 200)), struct('jackStructNew', 291 , 101), 'jacknew', 1000)")
+
+          // check again
+          checkAnswer(spark.sql(s"select name, userx.name as uxname, userx.score as uxs from ${tableName} order by id").collect())(
+            Seq(null, null, null),
+            Seq("jacknew", "jackStructNew", 101))
+
+
+          spark.sql(s"alter table ${tableName} alter column userx.age type long")
+
+          spark.sql(s"select userx.age, id, name from ${tableName}")
+          checkAnswer(spark.sql(s"select userx.age, id, name from ${tableName} order by id").collect())(
+            Seq(29, 1, null),
+            Seq(291, 2, "jacknew"))
+          // test map value type change
+          spark.sql(s"alter table ${tableName} add columns(mxp map<String, int>)")
+          spark.sql(s"insert into ${tableName} values(2 , map('k1', struct('v1', 100), 'k2', struct('v2', 200)), struct('jackStructNew', 291 , 101), 'jacknew', 1000, map('t1', 9))")
+          spark.sql(s"alter table ${tableName} alter column mxp.value type double")
+          spark.sql(s"insert into ${tableName} values(2 , map('k1', struct('v1', 100), 'k2', struct('v2', 200)), struct('jackStructNew', 291 , 101), 'jacknew', 1000, map('t1', 10))")
+          spark.sql(s"select * from $tableName").show(false)
+          checkAnswer(spark.sql(s"select mxp from ${tableName} order by id").collect())(
+            Seq(null),
+            Seq(Map("t1" -> 10.0d))
+          )
+        }
+      }
+    }
+  }
+
+  private def performClustering(writeDf: DataFrame, basePath: String, tableName: String, tableType: String): Unit = {
+    writeDf.write.format("org.apache.hudi")
+      .option(DataSourceWriteOptions.TABLE_TYPE.key(), tableType)
+      .option("hoodie.upsert.shuffle.parallelism", "1")
+      .option(DataSourceWriteOptions.RECORDKEY_FIELD.key(), "id")
+      .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key(), "comb")
+      .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key(), "par")
+      .option(HoodieWriteConfig.TBL_NAME.key, tableName)
+      .option("hoodie.schema.on.read.enable", "true")
+      // option for clustering
+      .option("hoodie.clustering.inline", "true")
+      .option("hoodie.clustering.inline.max.commits", "1")
+      .option("hoodie.clustering.plan.strategy.small.file.limit", String.valueOf(2*1024*1024L))
+      .option("hoodie.clustering.plan.strategy.max.bytes.per.group", String.valueOf(10*1024*1024L))
+      .option("hoodie.clustering.plan.strategy.target.file.max.bytes", String.valueOf(4 * 1024* 1024L))
+      .option(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key, "col1, col2")
+      .mode(SaveMode.Append)
+      .save(basePath)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala
index 6a0f0a4062166..a61d0f822cf45 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTruncateTable.scala
@@ -18,9 +18,14 @@
 
 package org.apache.spark.sql.hudi
 
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
+import org.apache.spark.sql.SaveMode
+
 class TestTruncateTable extends TestHoodieSqlBase {
 
-  test("Test Truncate Table") {
+  test("Test Truncate non-partitioned Table") {
     Seq("cow", "mor").foreach { tableType =>
       val tableName = generateTableName
       // Create table
@@ -51,4 +56,95 @@ class TestTruncateTable extends TestHoodieSqlBase {
       )
     }
   }
+
+  Seq(false, true).foreach { urlencode =>
+    test(s"Test Truncate single-partition table' partitions, urlencode: $urlencode") {
+      withTempDir { tmp =>
+        val tableName = generateTableName
+        val tablePath = s"${tmp.getCanonicalPath}/$tableName"
+
+        import spark.implicits._
+        val df = Seq((1, "z3", "v1", "2021/10/01"), (2, "l4", "v1", "2021/10/02"))
+          .toDF("id", "name", "ts", "dt")
+
+        df.write.format("hudi")
+          .option(HoodieWriteConfig.TBL_NAME.key, tableName)
+          .option(TABLE_TYPE.key, MOR_TABLE_TYPE_OPT_VAL)
+          .option(RECORDKEY_FIELD.key, "id")
+          .option(PRECOMBINE_FIELD.key, "ts")
+          .option(PARTITIONPATH_FIELD.key, "dt")
+          .option(URL_ENCODE_PARTITIONING.key(), urlencode)
+          .option(KEYGENERATOR_CLASS_NAME.key, classOf[SimpleKeyGenerator].getName)
+          .option(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "1")
+          .option(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "1")
+          .mode(SaveMode.Overwrite)
+          .save(tablePath)
+
+        // register meta to spark catalog by creating table
+        spark.sql(
+          s"""
+             |create table $tableName using hudi
+             |location '$tablePath'
+             |""".stripMargin)
+
+        // truncate 2021-10-01 partition
+        spark.sql(s"truncate table $tableName partition (dt='2021/10/01')")
+
+        checkAnswer(s"select dt from $tableName")(Seq(s"2021/10/02"))
+
+        // Truncate table
+        spark.sql(s"truncate table $tableName")
+        checkAnswer(s"select count(1) from $tableName")(Seq(0))
+      }
+    }
+  }
+
+  Seq(false, true).foreach { hiveStyle =>
+    test(s"Test Truncate multi-level partitioned table's partitions, isHiveStylePartitioning: $hiveStyle") {
+      withTempDir { tmp =>
+        val tableName = generateTableName
+        val tablePath = s"${tmp.getCanonicalPath}/$tableName"
+
+        import spark.implicits._
+        val df = Seq((1, "z3", "v1", "2021", "10", "01"), (2, "l4", "v1", "2021", "10","02"))
+          .toDF("id", "name", "ts", "year", "month", "day")
+
+        df.write.format("hudi")
+          .option(HoodieWriteConfig.TBL_NAME.key, tableName)
+          .option(TABLE_TYPE.key, COW_TABLE_TYPE_OPT_VAL)
+          .option(RECORDKEY_FIELD.key, "id")
+          .option(PRECOMBINE_FIELD.key, "ts")
+          .option(PARTITIONPATH_FIELD.key, "year,month,day")
+          .option(HIVE_STYLE_PARTITIONING.key, hiveStyle)
+          .option(KEYGENERATOR_CLASS_NAME.key, classOf[ComplexKeyGenerator].getName)
+          .option(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "1")
+          .option(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "1")
+          .mode(SaveMode.Overwrite)
+          .save(tablePath)
+
+        // register meta to spark catalog by creating table
+        spark.sql(
+          s"""
+             |create table $tableName using hudi
+             |location '$tablePath'
+             |""".stripMargin)
+
+        // not specified all partition column
+        checkExceptionContain(s"truncate table $tableName partition (year='2021', month='10')")(
+          "All partition columns need to be specified for Hoodie's partition"
+        )
+
+        // truncate 2021-10-01 partition
+        spark.sql(s"truncate table $tableName partition (year='2021', month='10', day='01')")
+
+        checkAnswer(s"select id, name, ts, year, month, day from $tableName")(
+          Seq(2, "l4", "v1", "2021", "10", "02")
+        )
+
+        // Truncate table
+        spark.sql(s"truncate table $tableName")
+        checkAnswer(s"select count(1) from $tableName")(Seq(0))
+      }
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
index e26e6617f1871..87814763bf4d3 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hudi.procedure
 
 import com.google.common.collect.ImmutableList
+import org.apache.hudi.HoodieSparkUtils
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{CallCommand, NamedArgument, PositionalArgument}
 import org.apache.spark.sql.hudi.TestHoodieSqlBase
@@ -40,7 +41,13 @@ class TestCallCommandParser extends TestHoodieSqlBase {
     checkArg(call, 2, 3L, DataTypes.LongType)
     checkArg(call, 3, true, DataTypes.BooleanType)
     checkArg(call, 4, 1.0D, DataTypes.DoubleType)
-    checkArg(call, 5, new BigDecimal("9.0e1"), DataTypes.createDecimalType(2, 0))
+
+    if (HoodieSparkUtils.isSpark2) {
+      checkArg(call, 5, 9.0e1, DataTypes.createDecimalType(2, 0))
+    } else {
+      checkArg(call, 5, 9.0e1, DataTypes.DoubleType)
+    }
+
     checkArg(call, 6, new BigDecimal("900e-1"), DataTypes.createDecimalType(3, 1))
   }
 
@@ -108,7 +115,7 @@ class TestCallCommandParser extends TestHoodieSqlBase {
     assertResult(expectedExpr.dataType)(actualExpr.dataType)
   }
 
-  private def toSparkLiteral(value: Any, dataType: DataType) = Literal.apply(value, dataType)
+  private def toSparkLiteral(value: Any, dataType: DataType) = Literal.create(value, dataType)
 
   private def checkCast[T](value: Any, expectedClass: Class[T]) = {
     assertResult(true)(expectedClass.isInstance(value))
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRunClusteringProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestClusteringProcedure.scala
similarity index 99%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRunClusteringProcedure.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestClusteringProcedure.scala
index 068cd65387057..6214117233467 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRunClusteringProcedure.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestClusteringProcedure.scala
@@ -23,11 +23,12 @@ import org.apache.hadoop.fs.Path
 import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline}
 import org.apache.hudi.common.util.{Option => HOption}
 import org.apache.hudi.{HoodieCLIUtils, HoodieDataSourceHelpers}
+
 import org.apache.spark.sql.hudi.TestHoodieSqlBase
 
 import scala.collection.JavaConverters.asScalaIteratorConverter
 
-class TestRunClusteringProcedure extends TestHoodieSqlBase {
+class TestClusteringProcedure extends TestHoodieSqlBase {
 
   test("Test Call run_clustering Procedure By Table") {
     withTempDir { tmp =>
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCompactionProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCompactionProcedure.scala
new file mode 100644
index 0000000000000..f6e6772d161b6
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCompactionProcedure.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.hudi.procedure
+
+import org.apache.spark.sql.hudi.TestHoodieSqlBase
+
+class TestCompactionProcedure extends TestHoodieSqlBase {
+
+  test("Test Call run_compaction Procedure by Table") {
+    withTempDir { tmp =>
+      val tableName = generateTableName
+      spark.sql(
+        s"""
+           |create table $tableName (
+           |  id int,
+           |  name string,
+           |  price double,
+           |  ts long
+           |) using hudi
+           | location '${tmp.getCanonicalPath}'
+           | tblproperties (
+           |  primaryKey ='id',
+           |  type = 'mor',
+           |  preCombineField = 'ts'
+           | )
+       """.stripMargin)
+      spark.sql("set hoodie.parquet.max.file.size = 10000")
+      spark.sql(s"insert into $tableName values(1, 'a1', 10, 1000)")
+      spark.sql(s"insert into $tableName values(2, 'a2', 10, 1000)")
+      spark.sql(s"insert into $tableName values(3, 'a3', 10, 1000)")
+      spark.sql(s"insert into $tableName values(4, 'a4', 10, 1000)")
+      spark.sql(s"update $tableName set price = 11 where id = 1")
+
+      spark.sql(s"call run_compaction(op => 'schedule', table => '$tableName')")
+      spark.sql(s"update $tableName set price = 12 where id = 2")
+      spark.sql(s"call run_compaction('schedule', '$tableName')")
+      val compactionRows = spark.sql(s"call show_compaction(table => '$tableName', limit => 10)").collect()
+      val timestamps = compactionRows.map(_.getString(0))
+      assertResult(2)(timestamps.length)
+
+      spark.sql(s"call run_compaction(op => 'run', table => '$tableName', timestamp => ${timestamps(1)})")
+      checkAnswer(s"select id, name, price, ts from $tableName order by id")(
+        Seq(1, "a1", 11.0, 1000),
+        Seq(2, "a2", 12.0, 1000),
+        Seq(3, "a3", 10.0, 1000),
+        Seq(4, "a4", 10.0, 1000)
+      )
+      assertResult(1)(spark.sql(s"call show_compaction('$tableName')").collect().length)
+      spark.sql(s"call run_compaction(op => 'run', table => '$tableName', timestamp => ${timestamps(0)})")
+      checkAnswer(s"select id, name, price, ts from $tableName order by id")(
+        Seq(1, "a1", 11.0, 1000),
+        Seq(2, "a2", 12.0, 1000),
+        Seq(3, "a3", 10.0, 1000),
+        Seq(4, "a4", 10.0, 1000)
+      )
+      assertResult(0)(spark.sql(s"call show_compaction(table => '$tableName')").collect().length)
+    }
+  }
+
+  test("Test Call run_compaction Procedure by Path") {
+    withTempDir { tmp =>
+      val tableName = generateTableName
+      spark.sql(
+        s"""
+           |create table $tableName (
+           |  id int,
+           |  name string,
+           |  price double,
+           |  ts long
+           |) using hudi
+           | location '${tmp.getCanonicalPath}'
+           | tblproperties (
+           |  primaryKey ='id',
+           |  type = 'mor',
+           |  preCombineField = 'ts'
+           | )
+       """.stripMargin)
+      spark.sql("set hoodie.parquet.max.file.size = 10000")
+      spark.sql(s"insert into $tableName values(1, 'a1', 10, 1000)")
+      spark.sql(s"insert into $tableName values(2, 'a2', 10, 1000)")
+      spark.sql(s"insert into $tableName values(3, 'a3', 10, 1000)")
+      spark.sql(s"update $tableName set price = 11 where id = 1")
+
+      spark.sql(s"call run_compaction(op => 'run', path => '${tmp.getCanonicalPath}')")
+      checkAnswer(s"select id, name, price, ts from $tableName order by id")(
+        Seq(1, "a1", 11.0, 1000),
+        Seq(2, "a2", 10.0, 1000),
+        Seq(3, "a3", 10.0, 1000)
+      )
+      assertResult(0)(spark.sql(s"call show_compaction(path => '${tmp.getCanonicalPath}')").collect().length)
+      // schedule compaction first
+      spark.sql(s"update $tableName set price = 12 where id = 1")
+      spark.sql(s"call run_compaction(op=> 'schedule', path => '${tmp.getCanonicalPath}')")
+
+      // schedule compaction second
+      spark.sql(s"update $tableName set price = 12 where id = 2")
+      spark.sql(s"call run_compaction(op => 'schedule', path => '${tmp.getCanonicalPath}')")
+
+      // show compaction
+      assertResult(2)(spark.sql(s"call show_compaction(path => '${tmp.getCanonicalPath}')").collect().length)
+      // run compaction for all the scheduled compaction
+      spark.sql(s"call run_compaction(op => 'run', path => '${tmp.getCanonicalPath}')")
+
+      checkAnswer(s"select id, name, price, ts from $tableName order by id")(
+        Seq(1, "a1", 12.0, 1000),
+        Seq(2, "a2", 12.0, 1000),
+        Seq(3, "a3", 10.0, 1000)
+      )
+      assertResult(0)(spark.sql(s"call show_compaction(path => '${tmp.getCanonicalPath}')").collect().length)
+
+      checkException(s"call run_compaction(op => 'run', path => '${tmp.getCanonicalPath}', timestamp => 12345L)")(
+        s"Compaction instant: 12345 is not found in ${tmp.getCanonicalPath}, Available pending compaction instants are:  "
+      )
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark2-common/pom.xml b/hudi-spark-datasource/hudi-spark2-common/pom.xml
index 403c2fe1e9db2..1cbdf7d1d8e1a 100644
--- a/hudi-spark-datasource/hudi-spark2-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark2-common/pom.xml
@@ -1,11 +1,27 @@
 <?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
 <project xmlns="http://maven.apache.org/POM/4.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
     <parent>
         <artifactId>hudi-spark-datasource</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
@@ -16,4 +32,4 @@
         <maven.compiler.target>8</maven.compiler.target>
     </properties>
 
-</project>
\ No newline at end of file
+</project>
diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml
index 3fb6cf3dd65ba..679579ae9a5e6 100644
--- a/hudi-spark-datasource/hudi-spark2/pom.xml
+++ b/hudi-spark-datasource/hudi-spark2/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark2_${scala.binary.version}</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-spark2_${scala.binary.version}</name>
   <packaging>jar</packaging>
@@ -203,14 +203,6 @@
       <optional>true</optional>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-      <version>${spark2.version}</version>
-      <scope>provided</scope>
-      <optional>true</optional>
-    </dependency>
-
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty</artifactId>
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala
new file mode 100644
index 0000000000000..f81ff7411e237
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import HoodieSparkTypeUtils.isCastPreservingOrdering
+import org.apache.spark.sql.catalyst.expressions.{Add, AttributeReference, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper}
+
+object HoodieSpark2CatalystExpressionUtils extends HoodieCatalystExpressionUtils {
+
+  override def tryMatchAttributeOrderingPreservingTransformation(expr: Expression): Option[AttributeReference] = {
+    expr match {
+      case OrderPreservingTransformation(attrRef) => Some(attrRef)
+      case _ => None
+    }
+  }
+
+  private object OrderPreservingTransformation {
+    def unapply(expr: Expression): Option[AttributeReference] = {
+      expr match {
+        // Date/Time Expressions
+        case DateFormatClass(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case DateAdd(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateSub(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateDiff(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateDiff(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case FromUnixTime(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case FromUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case ParseToDate(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case ParseToTimestamp(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case ToUnixTimestamp(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case ToUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+
+        // String Expressions
+        case Lower(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Upper(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case org.apache.spark.sql.catalyst.expressions.Left(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+
+        // Math Expressions
+        // Binary
+        case Add(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case Add(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Multiply(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case Multiply(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Divide(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case BitwiseOr(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case BitwiseOr(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        // Unary
+        case Exp(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Expm1(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log10(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log1p(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log2(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case ShiftLeft(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case ShiftRight(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+
+        // Other
+        case cast @ Cast(OrderPreservingTransformation(attrRef), _, _)
+          if isCastPreservingOrdering(cast.child.dataType, cast.dataType) => Some(attrRef)
+
+        // Identity transformation
+        case attrRef: AttributeReference => Some(attrRef)
+        // No match
+        case _ => None
+      }
+    }
+  }
+
+}
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
index 54c8b912a84e6..e4b3c4010a5e1 100644
--- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
@@ -1,3 +1,4 @@
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -20,33 +21,39 @@ package org.apache.spark.sql.adapter
 import org.apache.avro.Schema
 import org.apache.hudi.Spark2RowSerDe
 import org.apache.hudi.client.utils.SparkRowSerDe
-import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSerializer, HoodieSpark2AvroDeserializer, HoodieSparkAvroSerializer}
+import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer, HoodieSpark2_4AvroDeserializer, HoodieSpark2_4AvroSerializer, HoodieSparkAvroSchemaConverters}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, Spark2ParsePartitionUtil, SparkParsePartitionUtil}
 import org.apache.spark.sql.hudi.SparkAdapter
 import org.apache.spark.sql.hudi.parser.HoodieSpark2ExtendedSqlParser
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.{HoodieCatalystExpressionUtils, HoodieSpark2CatalystExpressionUtils, Row, SparkSession}
 
 import scala.collection.mutable.ArrayBuffer
 
 /**
- * The adapter for spark2.
+ * Implementation of [[SparkAdapter]] for Spark 2.4.x
  */
 class Spark2Adapter extends SparkAdapter {
 
-  def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializer =
-    new HoodieSparkAvroSerializer(rootCatalystType, rootAvroType, nullable)
+  override def createCatalystExpressionUtils(): HoodieCatalystExpressionUtils = HoodieSpark2CatalystExpressionUtils
+
+  override def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializer =
+    new HoodieSpark2_4AvroSerializer(rootCatalystType, rootAvroType, nullable)
 
-  def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer =
-    new HoodieSpark2AvroDeserializer(rootAvroType, rootCatalystType)
+  override def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer =
+    new HoodieSpark2_4AvroDeserializer(rootAvroType, rootCatalystType)
+
+  override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters
 
   override def createSparkRowSerDe(encoder: ExpressionEncoder[Row]): SparkRowSerDe = {
     new Spark2RowSerDe(encoder)
@@ -151,4 +158,14 @@ class Spark2Adapter extends SparkAdapter {
   override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
     throw new IllegalStateException(s"Should not call getRelationTimeTravel for spark2")
   }
+
+  override def createResolveHudiAlterTableCommand(sparkSession: SparkSession): Rule[LogicalPlan] = {
+    new Rule[LogicalPlan] {
+      override def apply(plan: LogicalPlan): LogicalPlan = plan
+    }
+  }
+
+  override def createHoodieParquetFileFormat(): Option[ParquetFileFormat] = {
+    Some(new ParquetFileFormat)
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/PatchedAvroDeserializer.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
similarity index 97%
rename from hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/PatchedAvroDeserializer.scala
rename to hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index 8d9948c58cdd8..2e0946f1eb989 100644
--- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/PatchedAvroDeserializer.scala
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -37,10 +37,16 @@ import scala.collection.mutable.ArrayBuffer
 /**
  * A deserializer to deserialize data in avro format to data in catalyst format.
  *
- * NOTE: This is a version of {@code AvroDeserializer} impl from Spark 2.4.4 w/ the fix for SPARK-30267
+ * NOTE: This code is borrowed from Spark 2.4.4
+ *       This code is borrowed, so that we can better control compatibility w/in Spark minor
+ *       branches (3.2.x, 3.1.x, etc)
+ *
+ *       PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ *
+ * NOTE: This is a version of [[AvroDeserializer]] impl from Spark 2.4.4 w/ the fix for SPARK-30267
  *       applied on top of it
  */
-class PatchedAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
+class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
   private lazy val decimalConversions = new DecimalConversion()
 
   private val converter: Any => Any = rootCatalystType match {
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
new file mode 100644
index 0000000000000..2673088f4f537
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import java.nio.ByteBuffer
+
+import scala.collection.JavaConverters._
+
+import org.apache.avro.{LogicalTypes, Schema}
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema
+import org.apache.avro.Schema.Type
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed, Record}
+import org.apache.avro.generic.GenericData.Record
+import org.apache.avro.util.Utf8
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, SpecificInternalRow}
+import org.apache.spark.sql.types._
+
+/**
+ * A serializer to serialize data in catalyst format to data in avro format.
+ *
+ * NOTE: This code is borrowed from Spark 2.4.4
+ *       This code is borrowed, so that we can better control compatibility w/in Spark minor
+ *       branches (3.2.x, 3.1.x, etc)
+ *
+ *       PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+class AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) {
+
+  def serialize(catalystData: Any): Any = {
+    converter.apply(catalystData)
+  }
+
+  private val converter: Any => Any = {
+    val actualAvroType = resolveNullableType(rootAvroType, nullable)
+    val baseConverter = rootCatalystType match {
+      case st: StructType =>
+        newStructConverter(st, actualAvroType).asInstanceOf[Any => Any]
+      case _ =>
+        val tmpRow = new SpecificInternalRow(Seq(rootCatalystType))
+        val converter = newConverter(rootCatalystType, actualAvroType)
+        (data: Any) =>
+          tmpRow.update(0, data)
+          converter.apply(tmpRow, 0)
+    }
+    if (nullable) {
+      (data: Any) =>
+        if (data == null) {
+          null
+        } else {
+          baseConverter.apply(data)
+        }
+    } else {
+      baseConverter
+    }
+  }
+
+  private type Converter = (SpecializedGetters, Int) => Any
+
+  private lazy val decimalConversions = new DecimalConversion()
+
+  private def newConverter(catalystType: DataType, avroType: Schema): Converter = {
+    (catalystType, avroType.getType) match {
+      case (NullType, NULL) =>
+        (getter, ordinal) => null
+      case (BooleanType, BOOLEAN) =>
+        (getter, ordinal) => getter.getBoolean(ordinal)
+      case (ByteType, INT) =>
+        (getter, ordinal) => getter.getByte(ordinal).toInt
+      case (ShortType, INT) =>
+        (getter, ordinal) => getter.getShort(ordinal).toInt
+      case (IntegerType, INT) =>
+        (getter, ordinal) => getter.getInt(ordinal)
+      case (LongType, LONG) =>
+        (getter, ordinal) => getter.getLong(ordinal)
+      case (FloatType, FLOAT) =>
+        (getter, ordinal) => getter.getFloat(ordinal)
+      case (DoubleType, DOUBLE) =>
+        (getter, ordinal) => getter.getDouble(ordinal)
+      case (d: DecimalType, FIXED)
+        if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) =>
+        (getter, ordinal) =>
+          val decimal = getter.getDecimal(ordinal, d.precision, d.scale)
+          decimalConversions.toFixed(decimal.toJavaBigDecimal, avroType,
+            LogicalTypes.decimal(d.precision, d.scale))
+
+      case (d: DecimalType, BYTES)
+        if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) =>
+        (getter, ordinal) =>
+          val decimal = getter.getDecimal(ordinal, d.precision, d.scale)
+          decimalConversions.toBytes(decimal.toJavaBigDecimal, avroType,
+            LogicalTypes.decimal(d.precision, d.scale))
+
+      case (StringType, ENUM) =>
+        val enumSymbols: Set[String] = avroType.getEnumSymbols.asScala.toSet
+        (getter, ordinal) =>
+          val data = getter.getUTF8String(ordinal).toString
+          if (!enumSymbols.contains(data)) {
+            throw new IncompatibleSchemaException(
+              "Cannot write \"" + data + "\" since it's not defined in enum \"" +
+                enumSymbols.mkString("\", \"") + "\"")
+          }
+          new EnumSymbol(avroType, data)
+
+      case (StringType, STRING) =>
+        (getter, ordinal) => new Utf8(getter.getUTF8String(ordinal).getBytes)
+
+      case (BinaryType, FIXED) =>
+        val size = avroType.getFixedSize()
+        (getter, ordinal) =>
+          val data: Array[Byte] = getter.getBinary(ordinal)
+          if (data.length != size) {
+            throw new IncompatibleSchemaException(
+              s"Cannot write ${data.length} ${if (data.length > 1) "bytes" else "byte"} of " +
+                "binary data into FIXED Type with size of " +
+                s"$size ${if (size > 1) "bytes" else "byte"}")
+          }
+          new Fixed(avroType, data)
+
+      case (BinaryType, BYTES) =>
+        (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal))
+
+      case (DateType, INT) =>
+        (getter, ordinal) => getter.getInt(ordinal)
+
+      case (TimestampType, LONG) => avroType.getLogicalType match {
+        case _: TimestampMillis => (getter, ordinal) => getter.getLong(ordinal) / 1000
+        case _: TimestampMicros => (getter, ordinal) => getter.getLong(ordinal)
+        // For backward compatibility, if the Avro type is Long and it is not logical type,
+        // output the timestamp value as with millisecond precision.
+        case null => (getter, ordinal) => getter.getLong(ordinal) / 1000
+        case other => throw new IncompatibleSchemaException(
+          s"Cannot convert Catalyst Timestamp type to Avro logical type ${other}")
+      }
+
+      case (ArrayType(et, containsNull), ARRAY) =>
+        val elementConverter = newConverter(
+          et, resolveNullableType(avroType.getElementType, containsNull))
+        (getter, ordinal) => {
+          val arrayData = getter.getArray(ordinal)
+          val len = arrayData.numElements()
+          val result = new Array[Any](len)
+          var i = 0
+          while (i < len) {
+            if (containsNull && arrayData.isNullAt(i)) {
+              result(i) = null
+            } else {
+              result(i) = elementConverter(arrayData, i)
+            }
+            i += 1
+          }
+          // avro writer is expecting a Java Collection, so we convert it into
+          // `ArrayList` backed by the specified array without data copying.
+          java.util.Arrays.asList(result: _*)
+        }
+
+      case (st: StructType, RECORD) =>
+        val structConverter = newStructConverter(st, avroType)
+        val numFields = st.length
+        (getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
+
+      case (st: StructType, UNION) =>
+        val unionConverter = newUnionConverter(st, avroType)
+        val numFields = st.length
+        (getter, ordinal) => unionConverter(getter.getStruct(ordinal, numFields))
+
+      case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType =>
+        val valueConverter = newConverter(
+          vt, resolveNullableType(avroType.getValueType, valueContainsNull))
+        (getter, ordinal) =>
+          val mapData = getter.getMap(ordinal)
+          val len = mapData.numElements()
+          val result = new java.util.HashMap[String, Any](len)
+          val keyArray = mapData.keyArray()
+          val valueArray = mapData.valueArray()
+          var i = 0
+          while (i < len) {
+            val key = keyArray.getUTF8String(i).toString
+            if (valueContainsNull && valueArray.isNullAt(i)) {
+              result.put(key, null)
+            } else {
+              result.put(key, valueConverter(valueArray, i))
+            }
+            i += 1
+          }
+          result
+
+      case other =>
+        throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystType to " +
+          s"Avro type $avroType.")
+    }
+  }
+
+  private def newStructConverter(catalystStruct: StructType, avroStruct: Schema): InternalRow => Record = {
+    if (avroStruct.getType != RECORD || avroStruct.getFields.size() != catalystStruct.length) {
+      throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
+        s"Avro type $avroStruct.")
+    }
+    val fieldConverters = catalystStruct.zip(avroStruct.getFields.asScala).map {
+      case (f1, f2) => newConverter(f1.dataType, resolveNullableType(f2.schema(), f1.nullable))
+    }
+    val numFields = catalystStruct.length
+    (row: InternalRow) =>
+      val result = new Record(avroStruct)
+      var i = 0
+      while (i < numFields) {
+        if (row.isNullAt(i)) {
+          result.put(i, null)
+        } else {
+          result.put(i, fieldConverters(i).apply(row, i))
+        }
+        i += 1
+      }
+      result
+  }
+
+  private def newUnionConverter(catalystStruct: StructType, avroUnion: Schema): InternalRow => Any = {
+    if (avroUnion.getType != UNION || !canMapUnion(catalystStruct, avroUnion)) {
+      throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
+        s"Avro type $avroUnion.")
+    }
+    val nullable = avroUnion.getTypes.size() > 0 && avroUnion.getTypes.get(0).getType == Type.NULL
+    val avroInnerTypes = if (nullable) {
+      avroUnion.getTypes.asScala.tail
+    } else {
+      avroUnion.getTypes.asScala
+    }
+    val fieldConverters = catalystStruct.zip(avroInnerTypes).map {
+      case (f1, f2) => newConverter(f1.dataType, f2)
+    }
+    val numFields = catalystStruct.length
+    (row: InternalRow) =>
+      var i = 0
+      var result: Any = null
+      while (i < numFields) {
+        if (!row.isNullAt(i)) {
+          if (result != null) {
+            throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
+              s"Avro union $avroUnion. Record has more than one optional values set")
+          }
+          result = fieldConverters(i).apply(row, i)
+        }
+        i += 1
+      }
+      if (!nullable && result == null) {
+        throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
+          s"Avro union $avroUnion. Record has no values set, while should have exactly one")
+      }
+      result
+  }
+
+  private def canMapUnion(catalystStruct: StructType, avroStruct: Schema): Boolean = {
+    (avroStruct.getTypes.size() > 0 &&
+      avroStruct.getTypes.get(0).getType == Type.NULL &&
+      avroStruct.getTypes.size() - 1 == catalystStruct.length) || avroStruct.getTypes.size() == catalystStruct.length
+  }
+
+  private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = {
+    if (nullable && avroType.getType != NULL) {
+      // Avro uses union to represent nullable type.
+      val fields = avroType.getTypes.asScala
+      val actualType = fields.filter(_.getType != Type.NULL)
+      if (fields.length == 2 && actualType.length == 1) {
+        actualType.head
+      } else {
+        // This is just a normal union, not used to designate nullability
+        avroType
+      }
+    } else {
+      avroType
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2_4AvroDeserializer.scala
similarity index 73%
rename from hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2AvroDeserializer.scala
rename to hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2_4AvroDeserializer.scala
index 2b55c6695e5b2..1c9bc88a3ad2e 100644
--- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2AvroDeserializer.scala
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2_4AvroDeserializer.scala
@@ -20,14 +20,10 @@ package org.apache.spark.sql.avro
 import org.apache.avro.Schema
 import org.apache.spark.sql.types.DataType
 
-/**
- * This is Spark 2 implementation for the [[HoodieAvroDeserializer]] leveraging [[PatchedAvroDeserializer]],
- * which is just copied over version of [[AvroDeserializer]] from Spark 2.4.4 w/ SPARK-30267 being back-ported to it
- */
-class HoodieSpark2AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
+class HoodieSpark2_4AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
   extends HoodieAvroDeserializer {
 
-  private val avroDeserializer = new PatchedAvroDeserializer(rootAvroType, rootCatalystType)
+  private val avroDeserializer = new AvroDeserializer(rootAvroType, rootCatalystType)
 
   // As of Spark 3.1, this will return data wrapped with Option, so we make sure these interfaces
   // are aligned across Spark versions
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieSparkAvroSerializer.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2_4AvroSerializer.scala
similarity index 91%
rename from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieSparkAvroSerializer.scala
rename to hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2_4AvroSerializer.scala
index 4a3a7c4526dee..48009ca165163 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieSparkAvroSerializer.scala
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/HoodieSpark2_4AvroSerializer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.avro
 import org.apache.avro.Schema
 import org.apache.spark.sql.types.DataType
 
-class HoodieSparkAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
+class HoodieSpark2_4AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
   extends HoodieAvroSerializer {
 
   val avroSerializer = new AvroSerializer(rootCatalystType, rootAvroType, nullable)
diff --git a/hudi-spark-datasource/hudi-spark3-common/pom.xml b/hudi-spark-datasource/hudi-spark3-common/pom.xml
index 30e7bda2e2eb9..1781e628fb690 100644
--- a/hudi-spark-datasource/hudi-spark3-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3-common/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <artifactId>hudi-spark-datasource</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
@@ -172,14 +172,6 @@
             <optional>true</optional>
         </dependency>
 
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-avro_2.12</artifactId>
-            <version>${spark3.version}</version>
-            <scope>provided</scope>
-            <optional>true</optional>
-        </dependency>
-
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
similarity index 81%
rename from hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala
rename to hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
index ad338323e8193..e5f4476cc5a98 100644
--- a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala
+++ b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
@@ -17,42 +17,39 @@
 
 package org.apache.spark.sql.adapter
 
-import org.apache.avro.Schema
 import org.apache.hudi.Spark3RowSerDe
 import org.apache.hudi.client.utils.SparkRowSerDe
+import org.apache.spark.SPARK_VERSION
 import org.apache.hudi.spark3.internal.ReflectUtil
-import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSerializer, HoodieSpark3AvroDeserializer, HoodieSparkAvroSerializer}
+import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, JoinHint, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.hudi.SparkAdapter
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.{Row, SparkSession}
 
 /**
- * The adapter for spark3.
+ * Base implementation of [[SparkAdapter]] for Spark 3.x branch
  */
-class Spark3Adapter extends SparkAdapter {
-
-  def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializer =
-    new HoodieSparkAvroSerializer(rootCatalystType, rootAvroType, nullable)
-
-  def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer =
-    new HoodieSpark3AvroDeserializer(rootAvroType, rootCatalystType)
+abstract class BaseSpark3Adapter extends SparkAdapter {
 
   override def createSparkRowSerDe(encoder: ExpressionEncoder[Row]): SparkRowSerDe = {
     new Spark3RowSerDe(encoder)
   }
 
+  override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters
+
   override def toTableIdentifier(aliasId: AliasIdentifier): TableIdentifier = {
     aliasId match {
       case AliasIdentifier(name, Seq(database)) =>
@@ -137,4 +134,19 @@ class Spark3Adapter extends SparkAdapter {
   override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
     throw new IllegalStateException(s"Should not call getRelationTimeTravel for spark3.1.x")
   }
+  override def createExtendedSparkParser: Option[(SparkSession, ParserInterface) => ParserInterface] = {
+    // since spark3.2.1 support datasourceV2, so we need to a new SqlParser to deal DDL statment
+    if (SPARK_VERSION.startsWith("3.1")) {
+      val loadClassName = "org.apache.spark.sql.parser.HoodieSpark312ExtendedSqlParser"
+      Some {
+        (spark: SparkSession, delegate: ParserInterface) => {
+          val clazz = Class.forName(loadClassName, true, Thread.currentThread().getContextClassLoader)
+          val ctor = clazz.getConstructors.head
+          ctor.newInstance(spark, delegate).asInstanceOf[ParserInterface]
+        }
+      }
+    } else {
+      None
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml
index f6d9f7d557216..bd46caaa87a5a 100644
--- a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark3.1.x_2.12</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-spark3.1.x_2.12</name>
   <packaging>jar</packaging>
@@ -46,6 +46,7 @@
           <configuration>
             <args>
               <arg>-nobootcp</arg>
+              <arg>-target:jvm-1.8</arg>
             </args>
             <checkMultipleScalaVersions>false</checkMultipleScalaVersions>
           </configuration>
@@ -157,7 +158,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_2.12</artifactId>
-      <version>${spark3.version}</version>
+      <version>${spark31.version}</version>
       <optional>true</optional>
     </dependency>
 
@@ -181,16 +182,36 @@
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-client</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark3-common</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <!-- Hoodie - Test -->
@@ -202,6 +223,7 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-client</artifactId>
@@ -209,7 +231,14 @@
       <classifier>tests</classifier>
       <type>test-jar</type>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-common</artifactId>
@@ -218,6 +247,7 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
@@ -225,6 +255,12 @@
       <classifier>tests</classifier>
       <type>test-jar</type>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark312HoodieVectorizedParquetRecordReader.java b/hudi-spark-datasource/hudi-spark3.1.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark312HoodieVectorizedParquetRecordReader.java
new file mode 100644
index 0000000000000..3f86eeedffa47
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark312HoodieVectorizedParquetRecordReader.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet;
+
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.spark.memory.MemoryMode;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+import java.io.IOException;
+import java.time.ZoneId;
+import java.util.HashMap;
+import java.util.Map;
+
+public class Spark312HoodieVectorizedParquetRecordReader extends VectorizedParquetRecordReader {
+
+  // save the col type change info.
+  private Map<Integer, Pair<DataType, DataType>> typeChangeInfos;
+
+  private ColumnarBatch columnarBatch;
+
+  private Map<Integer, WritableColumnVector> idToColumnVectors;
+
+  private WritableColumnVector[] columnVectors;
+
+  // The capacity of vectorized batch.
+  private int capacity;
+
+  // If true, this class returns batches instead of rows.
+  private boolean returnColumnarBatch;
+
+  // The memory mode of the columnarBatch.
+  private final MemoryMode memoryMode;
+
+  /**
+   * Batch of rows that we assemble and the current index we've returned. Every time this
+   * batch is used up (batchIdx == numBatched), we populated the batch.
+   */
+  private int batchIdx = 0;
+  private int numBatched = 0;
+
+  public Spark312HoodieVectorizedParquetRecordReader(
+      ZoneId convertTz,
+      String datetimeRebaseMode,
+      String int96RebaseMode,
+      boolean useOffHeap,
+      int capacity,
+      Map<Integer, Pair<DataType, DataType>> typeChangeInfos) {
+    super(convertTz, datetimeRebaseMode, int96RebaseMode, useOffHeap, capacity);
+    memoryMode = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
+    this.typeChangeInfos = typeChangeInfos;
+    this.capacity = capacity;
+  }
+
+  @Override
+  public void initBatch(StructType partitionColumns, InternalRow partitionValues) {
+    super.initBatch(partitionColumns, partitionValues);
+    if (columnVectors == null) {
+      columnVectors = new WritableColumnVector[sparkSchema.length() + partitionColumns.length()];
+    }
+    if (idToColumnVectors == null) {
+      idToColumnVectors = new HashMap<>();
+      typeChangeInfos.entrySet()
+          .stream()
+          .forEach(f -> {
+            WritableColumnVector vector =
+                memoryMode == MemoryMode.OFF_HEAP ? new OffHeapColumnVector(capacity, f.getValue().getLeft()) : new OnHeapColumnVector(capacity, f.getValue().getLeft());
+            idToColumnVectors.put(f.getKey(), vector);
+          });
+    }
+  }
+
+  @Override
+  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException {
+    super.initialize(inputSplit, taskAttemptContext);
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+    for (Map.Entry<Integer, WritableColumnVector> e : idToColumnVectors.entrySet()) {
+      e.getValue().close();
+    }
+    idToColumnVectors = null;
+    columnarBatch = null;
+    columnVectors = null;
+  }
+
+  @Override
+  public ColumnarBatch resultBatch() {
+    ColumnarBatch currentColumnBatch = super.resultBatch();
+    boolean changed = false;
+    for (Map.Entry<Integer, Pair<DataType, DataType>> entry : typeChangeInfos.entrySet()) {
+      boolean rewrite = SparkInternalSchemaConverter
+          .convertColumnVectorType((WritableColumnVector) currentColumnBatch.column(entry.getKey()),
+              idToColumnVectors.get(entry.getKey()), currentColumnBatch.numRows());
+      if (rewrite) {
+        changed = true;
+        columnVectors[entry.getKey()] = idToColumnVectors.get(entry.getKey());
+      }
+    }
+    if (changed) {
+      if (columnarBatch == null) {
+        // fill other vector
+        for (int i = 0; i < columnVectors.length; i++) {
+          if (columnVectors[i] == null) {
+            columnVectors[i] = (WritableColumnVector) currentColumnBatch.column(i);
+          }
+        }
+        columnarBatch = new ColumnarBatch(columnVectors);
+      }
+      columnarBatch.setNumRows(currentColumnBatch.numRows());
+      return columnarBatch;
+    } else {
+      return currentColumnBatch;
+    }
+  }
+
+  @Override
+  public boolean nextBatch() throws IOException {
+    boolean result = super.nextBatch();
+    if (idToColumnVectors != null) {
+      idToColumnVectors.entrySet().stream().forEach(e -> e.getValue().reset());
+    }
+    numBatched = resultBatch().numRows();
+    batchIdx = 0;
+    return result;
+  }
+
+  @Override
+  public void enableReturningBatches() {
+    returnColumnarBatch = true;
+    super.enableReturningBatches();
+  }
+
+  @Override
+  public Object getCurrentValue() {
+    if (typeChangeInfos == null || typeChangeInfos.isEmpty()) {
+      return super.getCurrentValue();
+    }
+
+    if (returnColumnarBatch) {
+      return columnarBatch == null ? super.getCurrentValue() : columnarBatch;
+    }
+
+    return columnarBatch == null ? super.getCurrentValue() : columnarBatch.getRow(batchIdx - 1);
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException {
+    resultBatch();
+
+    if (returnColumnarBatch)  {
+      return nextBatch();
+    }
+
+    if (batchIdx >= numBatched) {
+      if (!nextBatch()) {
+        return false;
+      }
+    }
+    ++batchIdx;
+    return true;
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark3_1CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark3_1CatalystExpressionUtils.scala
new file mode 100644
index 0000000000000..3e65123636fc9
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark3_1CatalystExpressionUtils.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import HoodieSparkTypeUtils.isCastPreservingOrdering
+import org.apache.spark.sql.catalyst.expressions.{Add, AttributeReference, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper}
+
+object HoodieSpark3_1CatalystExpressionUtils extends HoodieCatalystExpressionUtils {
+
+  override def tryMatchAttributeOrderingPreservingTransformation(expr: Expression): Option[AttributeReference] = {
+    expr match {
+      case OrderPreservingTransformation(attrRef) => Some(attrRef)
+      case _ => None
+    }
+  }
+
+  private object OrderPreservingTransformation {
+    def unapply(expr: Expression): Option[AttributeReference] = {
+      expr match {
+        // Date/Time Expressions
+        case DateFormatClass(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case DateAdd(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateSub(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateDiff(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateDiff(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case FromUnixTime(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case FromUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case ParseToDate(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case ParseToTimestamp(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case ToUnixTimestamp(OrderPreservingTransformation(attrRef), _, _, _) => Some(attrRef)
+        case ToUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+
+        // String Expressions
+        case Lower(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Upper(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case org.apache.spark.sql.catalyst.expressions.Left(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+
+        // Math Expressions
+        // Binary
+        case Add(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case Add(_, OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case Multiply(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case Multiply(_, OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case Divide(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case BitwiseOr(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case BitwiseOr(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        // Unary
+        case Exp(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Expm1(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log10(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log1p(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log2(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case ShiftLeft(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case ShiftRight(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+
+        // Other
+        case cast @ Cast(OrderPreservingTransformation(attrRef), _, _)
+          if isCastPreservingOrdering(cast.child.dataType, cast.dataType) => Some(attrRef)
+
+        // Identity transformation
+        case attrRef: AttributeReference => Some(attrRef)
+        // No match
+        case _ => None
+      }
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_1Adapter.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_1Adapter.scala
new file mode 100644
index 0000000000000..13dba82488271
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_1Adapter.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.adapter
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer, HoodieSpark3_1AvroDeserializer, HoodieSpark3_1AvroSerializer, HoodieSparkAvroSchemaConverters}
+import org.apache.spark.sql.hudi.SparkAdapter
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{HoodieCatalystExpressionUtils, HoodieSpark3_1CatalystExpressionUtils}
+import org.apache.spark.SPARK_VERSION
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.{HoodieCatalystExpressionUtils, HoodieSpark3_1CatalystExpressionUtils, SparkSession}
+
+/**
+ * Implementation of [[SparkAdapter]] for Spark 3.1.x
+ */
+class Spark3_1Adapter extends BaseSpark3Adapter {
+
+  override def createCatalystExpressionUtils(): HoodieCatalystExpressionUtils = HoodieSpark3_1CatalystExpressionUtils
+
+  override def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializer =
+    new HoodieSpark3_1AvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  override def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer =
+    new HoodieSpark3_1AvroDeserializer(rootAvroType, rootCatalystType)
+
+  override def createResolveHudiAlterTableCommand(sparkSession: SparkSession): Rule[LogicalPlan] = {
+    if (SPARK_VERSION.startsWith("3.1")) {
+      val loadClassName = "org.apache.spark.sql.hudi.ResolveHudiAlterTableCommand312"
+      val clazz = Class.forName(loadClassName, true, Thread.currentThread().getContextClassLoader)
+      val ctor = clazz.getConstructors.head
+      ctor.newInstance(sparkSession).asInstanceOf[Rule[LogicalPlan]]
+    } else {
+      new Rule[LogicalPlan] {
+        override def apply(plan: LogicalPlan): LogicalPlan = plan
+      }
+    }
+  }
+
+  override def createHoodieParquetFileFormat(): Option[ParquetFileFormat] = {
+    if (SPARK_VERSION.startsWith("3.1")) {
+      val loadClassName = "org.apache.spark.sql.execution.datasources.parquet.Spark312HoodieParquetFileFormat"
+      val clazz = Class.forName(loadClassName, true, Thread.currentThread().getContextClassLoader)
+      val ctor = clazz.getConstructors.head
+      Some(ctor.newInstance().asInstanceOf[ParquetFileFormat])
+    } else {
+      None
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
new file mode 100644
index 0000000000000..717df0f4076ee
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -0,0 +1,493 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic._
+import org.apache.avro.util.Utf8
+import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
+import org.apache.spark.sql.avro.AvroDeserializer.{createDateRebaseFuncInRead, createTimestampRebaseFuncInRead}
+import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, StructFilters}
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+import java.math.BigDecimal
+import java.nio.ByteBuffer
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+/**
+ * A deserializer to deserialize data in avro format to data in catalyst format.
+ *
+ * NOTE: This code is borrowed from Spark 3.1.2
+ *       This code is borrowed, so that we can better control compatibility w/in Spark minor
+ *       branches (3.2.x, 3.1.x, etc)
+ *
+ *       PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+private[sql] class AvroDeserializer(rootAvroType: Schema,
+                                    rootCatalystType: DataType,
+                                    datetimeRebaseMode: LegacyBehaviorPolicy.Value,
+                                    filters: StructFilters) {
+
+  def this(rootAvroType: Schema, rootCatalystType: DataType) = {
+    this(
+      rootAvroType,
+      rootCatalystType,
+      LegacyBehaviorPolicy.withName(SQLConf.get.getConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ)),
+      new NoopFilters)
+  }
+
+  private lazy val decimalConversions = new DecimalConversion()
+
+  private val dateRebaseFunc = createDateRebaseFuncInRead(
+    datetimeRebaseMode, "Avro")
+
+  private val timestampRebaseFunc = createTimestampRebaseFuncInRead(
+    datetimeRebaseMode, "Avro")
+
+  private val converter: Any => Option[Any] = rootCatalystType match {
+    // A shortcut for empty schema.
+    case st: StructType if st.isEmpty =>
+      (data: Any) => Some(InternalRow.empty)
+
+    case st: StructType =>
+      val resultRow = new SpecificInternalRow(st.map(_.dataType))
+      val fieldUpdater = new RowUpdater(resultRow)
+      val applyFilters = filters.skipRow(resultRow, _)
+      val writer = getRecordWriter(rootAvroType, st, Nil, applyFilters)
+      (data: Any) => {
+        val record = data.asInstanceOf[GenericRecord]
+        val skipRow = writer(fieldUpdater, record)
+        if (skipRow) None else Some(resultRow)
+      }
+
+    case _ =>
+      val tmpRow = new SpecificInternalRow(Seq(rootCatalystType))
+      val fieldUpdater = new RowUpdater(tmpRow)
+      val writer = newWriter(rootAvroType, rootCatalystType, Nil)
+      (data: Any) => {
+        writer(fieldUpdater, 0, data)
+        Some(tmpRow.get(0, rootCatalystType))
+      }
+  }
+
+  def deserialize(data: Any): Option[Any] = converter(data)
+
+  /**
+   * Creates a writer to write avro values to Catalyst values at the given ordinal with the given
+   * updater.
+   */
+  private def newWriter(avroType: Schema,
+                        catalystType: DataType,
+                        path: List[String]): (CatalystDataUpdater, Int, Any) => Unit =
+    (avroType.getType, catalystType) match {
+      case (NULL, NullType) => (updater, ordinal, _) =>
+        updater.setNullAt(ordinal)
+
+      // TODO: we can avoid boxing if future version of avro provide primitive accessors.
+      case (BOOLEAN, BooleanType) => (updater, ordinal, value) =>
+        updater.setBoolean(ordinal, value.asInstanceOf[Boolean])
+
+      case (INT, IntegerType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, value.asInstanceOf[Int])
+
+      case (INT, DateType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, dateRebaseFunc(value.asInstanceOf[Int]))
+
+      case (LONG, LongType) => (updater, ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[Long])
+
+      case (LONG, TimestampType) => avroType.getLogicalType match {
+        // For backward compatibility, if the Avro type is Long and it is not logical type
+        // (the `null` case), the value is processed as timestamp type with millisecond precision.
+        case null | _: TimestampMillis => (updater, ordinal, value) =>
+          val millis = value.asInstanceOf[Long]
+          val micros = DateTimeUtils.millisToMicros(millis)
+          updater.setLong(ordinal, timestampRebaseFunc(micros))
+        case _: TimestampMicros => (updater, ordinal, value) =>
+          val micros = value.asInstanceOf[Long]
+          updater.setLong(ordinal, timestampRebaseFunc(micros))
+        case other => throw new IncompatibleSchemaException(
+          s"Cannot convert Avro logical type ${other} to Catalyst Timestamp type.")
+      }
+
+      // Before we upgrade Avro to 1.8 for logical type support, spark-avro converts Long to Date.
+      // For backward compatibility, we still keep this conversion.
+      case (LONG, DateType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, (value.asInstanceOf[Long] / MILLIS_PER_DAY).toInt)
+
+      case (FLOAT, FloatType) => (updater, ordinal, value) =>
+        updater.setFloat(ordinal, value.asInstanceOf[Float])
+
+      case (DOUBLE, DoubleType) => (updater, ordinal, value) =>
+        updater.setDouble(ordinal, value.asInstanceOf[Double])
+
+      case (STRING, StringType) => (updater, ordinal, value) =>
+        val str = value match {
+          case s: String => UTF8String.fromString(s)
+          case s: Utf8 =>
+            val bytes = new Array[Byte](s.getByteLength)
+            System.arraycopy(s.getBytes, 0, bytes, 0, s.getByteLength)
+            UTF8String.fromBytes(bytes)
+        }
+        updater.set(ordinal, str)
+
+      case (ENUM, StringType) => (updater, ordinal, value) =>
+        updater.set(ordinal, UTF8String.fromString(value.toString))
+
+      case (FIXED, BinaryType) => (updater, ordinal, value) =>
+        updater.set(ordinal, value.asInstanceOf[GenericFixed].bytes().clone())
+
+      case (BYTES, BinaryType) => (updater, ordinal, value) =>
+        val bytes = value match {
+          case b: ByteBuffer =>
+            val bytes = new Array[Byte](b.remaining)
+            b.get(bytes)
+            bytes
+          case b: Array[Byte] => b
+          case other => throw new RuntimeException(s"$other is not a valid avro binary.")
+        }
+        updater.set(ordinal, bytes)
+
+      case (FIXED, _: DecimalType) => (updater, ordinal, value) =>
+        val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal]
+        val bigDecimal = decimalConversions.fromFixed(value.asInstanceOf[GenericFixed], avroType, d)
+        val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
+        updater.setDecimal(ordinal, decimal)
+
+      case (BYTES, _: DecimalType) => (updater, ordinal, value) =>
+        val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal]
+        val bigDecimal = decimalConversions.fromBytes(value.asInstanceOf[ByteBuffer], avroType, d)
+        val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
+        updater.setDecimal(ordinal, decimal)
+
+      case (RECORD, st: StructType) =>
+        // Avro datasource doesn't accept filters with nested attributes. See SPARK-32328.
+        // We can always return `false` from `applyFilters` for nested records.
+        val writeRecord = getRecordWriter(avroType, st, path, applyFilters = _ => false)
+        (updater, ordinal, value) =>
+          val row = new SpecificInternalRow(st)
+          writeRecord(new RowUpdater(row), value.asInstanceOf[GenericRecord])
+          updater.set(ordinal, row)
+
+      case (ARRAY, ArrayType(elementType, containsNull)) =>
+        val elementWriter = newWriter(avroType.getElementType, elementType, path)
+        (updater, ordinal, value) =>
+          val collection = value.asInstanceOf[java.util.Collection[Any]]
+          val result = createArrayData(elementType, collection.size())
+          val elementUpdater = new ArrayDataUpdater(result)
+
+          var i = 0
+          val iter = collection.iterator()
+          while (iter.hasNext) {
+            val element = iter.next()
+            if (element == null) {
+              if (!containsNull) {
+                throw new RuntimeException(s"Array value at path ${path.mkString(".")} is not " +
+                  "allowed to be null")
+              } else {
+                elementUpdater.setNullAt(i)
+              }
+            } else {
+              elementWriter(elementUpdater, i, element)
+            }
+            i += 1
+          }
+
+          updater.set(ordinal, result)
+
+      case (MAP, MapType(keyType, valueType, valueContainsNull)) if keyType == StringType =>
+        val keyWriter = newWriter(SchemaBuilder.builder().stringType(), StringType, path)
+        val valueWriter = newWriter(avroType.getValueType, valueType, path)
+        (updater, ordinal, value) =>
+          val map = value.asInstanceOf[java.util.Map[AnyRef, AnyRef]]
+          val keyArray = createArrayData(keyType, map.size())
+          val keyUpdater = new ArrayDataUpdater(keyArray)
+          val valueArray = createArrayData(valueType, map.size())
+          val valueUpdater = new ArrayDataUpdater(valueArray)
+          val iter = map.entrySet().iterator()
+          var i = 0
+          while (iter.hasNext) {
+            val entry = iter.next()
+            assert(entry.getKey != null)
+            keyWriter(keyUpdater, i, entry.getKey)
+            if (entry.getValue == null) {
+              if (!valueContainsNull) {
+                throw new RuntimeException(s"Map value at path ${path.mkString(".")} is not " +
+                  "allowed to be null")
+              } else {
+                valueUpdater.setNullAt(i)
+              }
+            } else {
+              valueWriter(valueUpdater, i, entry.getValue)
+            }
+            i += 1
+          }
+
+          // The Avro map will never have null or duplicated map keys, it's safe to create a
+          // ArrayBasedMapData directly here.
+          updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
+
+      case (UNION, _) =>
+        val allTypes = avroType.getTypes.asScala
+        val nonNullTypes = allTypes.filter(_.getType != NULL)
+        val nonNullAvroType = Schema.createUnion(nonNullTypes.asJava)
+        if (nonNullTypes.nonEmpty) {
+          if (nonNullTypes.length == 1) {
+            newWriter(nonNullTypes.head, catalystType, path)
+          } else {
+            nonNullTypes.map(_.getType).toSeq match {
+              case Seq(a, b) if Set(a, b) == Set(INT, LONG) && catalystType == LongType =>
+                (updater, ordinal, value) =>
+                  value match {
+                    case null => updater.setNullAt(ordinal)
+                    case l: java.lang.Long => updater.setLong(ordinal, l)
+                    case i: java.lang.Integer => updater.setLong(ordinal, i.longValue())
+                  }
+
+              case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && catalystType == DoubleType =>
+                (updater, ordinal, value) =>
+                  value match {
+                    case null => updater.setNullAt(ordinal)
+                    case d: java.lang.Double => updater.setDouble(ordinal, d)
+                    case f: java.lang.Float => updater.setDouble(ordinal, f.doubleValue())
+                  }
+
+              case _ =>
+                catalystType match {
+                  case st: StructType if st.length == nonNullTypes.size =>
+                    val fieldWriters = nonNullTypes.zip(st.fields).map {
+                      case (schema, field) => newWriter(schema, field.dataType, path :+ field.name)
+                    }.toArray
+                    (updater, ordinal, value) => {
+                      val row = new SpecificInternalRow(st)
+                      val fieldUpdater = new RowUpdater(row)
+                      val i = GenericData.get().resolveUnion(nonNullAvroType, value)
+                      fieldWriters(i)(fieldUpdater, i, value)
+                      updater.set(ordinal, row)
+                    }
+
+                  case _ =>
+                    throw new IncompatibleSchemaException(
+                      s"Cannot convert Avro to catalyst because schema at path " +
+                        s"${path.mkString(".")} is not compatible " +
+                        s"(avroType = $avroType, sqlType = $catalystType).\n" +
+                        s"Source Avro schema: $rootAvroType.\n" +
+                        s"Target Catalyst type: $rootCatalystType")
+                }
+            }
+          }
+        } else {
+          (updater, ordinal, value) => updater.setNullAt(ordinal)
+        }
+
+      case _ =>
+        throw new IncompatibleSchemaException(
+          s"Cannot convert Avro to catalyst because schema at path ${path.mkString(".")} " +
+            s"is not compatible (avroType = $avroType, sqlType = $catalystType).\n" +
+            s"Source Avro schema: $rootAvroType.\n" +
+            s"Target Catalyst type: $rootCatalystType")
+    }
+
+  // TODO: move the following method in Decimal object on creating Decimal from BigDecimal?
+  private def createDecimal(decimal: BigDecimal, precision: Int, scale: Int): Decimal = {
+    if (precision <= Decimal.MAX_LONG_DIGITS) {
+      // Constructs a `Decimal` with an unscaled `Long` value if possible.
+      Decimal(decimal.unscaledValue().longValue(), precision, scale)
+    } else {
+      // Otherwise, resorts to an unscaled `BigInteger` instead.
+      Decimal(decimal, precision, scale)
+    }
+  }
+
+  private def getRecordWriter(avroType: Schema,
+                              sqlType: StructType,
+                              path: List[String],
+                              applyFilters: Int => Boolean): (CatalystDataUpdater, GenericRecord) => Boolean = {
+    val validFieldIndexes = ArrayBuffer.empty[Int]
+    val fieldWriters = ArrayBuffer.empty[(CatalystDataUpdater, Any) => Unit]
+
+    val avroSchemaHelper = new AvroUtils.AvroSchemaHelper(avroType)
+    val length = sqlType.length
+    var i = 0
+    while (i < length) {
+      val sqlField = sqlType.fields(i)
+      avroSchemaHelper.getFieldByName(sqlField.name) match {
+        case Some(avroField) =>
+          validFieldIndexes += avroField.pos()
+
+          val baseWriter = newWriter(avroField.schema(), sqlField.dataType, path :+ sqlField.name)
+          val ordinal = i
+          val fieldWriter = (fieldUpdater: CatalystDataUpdater, value: Any) => {
+            if (value == null) {
+              fieldUpdater.setNullAt(ordinal)
+            } else {
+              baseWriter(fieldUpdater, ordinal, value)
+            }
+          }
+          fieldWriters += fieldWriter
+        case None if !sqlField.nullable =>
+          val fieldStr = s"${path.mkString(".")}.${sqlField.name}"
+          throw new IncompatibleSchemaException(
+            s"""
+               |Cannot find non-nullable field $fieldStr in Avro schema.
+               |Source Avro schema: $rootAvroType.
+               |Target Catalyst type: $rootCatalystType.
+           """.stripMargin)
+        case _ => // nothing to do
+      }
+      i += 1
+    }
+
+    (fieldUpdater, record) => {
+      var i = 0
+      var skipRow = false
+      while (i < validFieldIndexes.length && !skipRow) {
+        fieldWriters(i)(fieldUpdater, record.get(validFieldIndexes(i)))
+        skipRow = applyFilters(i)
+        i += 1
+      }
+      skipRow
+    }
+  }
+
+  private def createArrayData(elementType: DataType, length: Int): ArrayData = elementType match {
+    case BooleanType => UnsafeArrayData.fromPrimitiveArray(new Array[Boolean](length))
+    case ByteType => UnsafeArrayData.fromPrimitiveArray(new Array[Byte](length))
+    case ShortType => UnsafeArrayData.fromPrimitiveArray(new Array[Short](length))
+    case IntegerType => UnsafeArrayData.fromPrimitiveArray(new Array[Int](length))
+    case LongType => UnsafeArrayData.fromPrimitiveArray(new Array[Long](length))
+    case FloatType => UnsafeArrayData.fromPrimitiveArray(new Array[Float](length))
+    case DoubleType => UnsafeArrayData.fromPrimitiveArray(new Array[Double](length))
+    case _ => new GenericArrayData(new Array[Any](length))
+  }
+
+  /**
+   * A base interface for updating values inside catalyst data structure like `InternalRow` and
+   * `ArrayData`.
+   */
+  sealed trait CatalystDataUpdater {
+    def set(ordinal: Int, value: Any): Unit
+
+    def setNullAt(ordinal: Int): Unit = set(ordinal, null)
+
+    def setBoolean(ordinal: Int, value: Boolean): Unit = set(ordinal, value)
+
+    def setByte(ordinal: Int, value: Byte): Unit = set(ordinal, value)
+
+    def setShort(ordinal: Int, value: Short): Unit = set(ordinal, value)
+
+    def setInt(ordinal: Int, value: Int): Unit = set(ordinal, value)
+
+    def setLong(ordinal: Int, value: Long): Unit = set(ordinal, value)
+
+    def setDouble(ordinal: Int, value: Double): Unit = set(ordinal, value)
+
+    def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value)
+
+    def setDecimal(ordinal: Int, value: Decimal): Unit = set(ordinal, value)
+  }
+
+  final class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value)
+
+    override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal)
+
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = row.setBoolean(ordinal, value)
+
+    override def setByte(ordinal: Int, value: Byte): Unit = row.setByte(ordinal, value)
+
+    override def setShort(ordinal: Int, value: Short): Unit = row.setShort(ordinal, value)
+
+    override def setInt(ordinal: Int, value: Int): Unit = row.setInt(ordinal, value)
+
+    override def setLong(ordinal: Int, value: Long): Unit = row.setLong(ordinal, value)
+
+    override def setDouble(ordinal: Int, value: Double): Unit = row.setDouble(ordinal, value)
+
+    override def setFloat(ordinal: Int, value: Float): Unit = row.setFloat(ordinal, value)
+
+    override def setDecimal(ordinal: Int, value: Decimal): Unit =
+      row.setDecimal(ordinal, value, value.precision)
+  }
+
+  final class ArrayDataUpdater(array: ArrayData) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = array.update(ordinal, value)
+
+    override def setNullAt(ordinal: Int): Unit = array.setNullAt(ordinal)
+
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = array.setBoolean(ordinal, value)
+
+    override def setByte(ordinal: Int, value: Byte): Unit = array.setByte(ordinal, value)
+
+    override def setShort(ordinal: Int, value: Short): Unit = array.setShort(ordinal, value)
+
+    override def setInt(ordinal: Int, value: Int): Unit = array.setInt(ordinal, value)
+
+    override def setLong(ordinal: Int, value: Long): Unit = array.setLong(ordinal, value)
+
+    override def setDouble(ordinal: Int, value: Double): Unit = array.setDouble(ordinal, value)
+
+    override def setFloat(ordinal: Int, value: Float): Unit = array.setFloat(ordinal, value)
+
+    override def setDecimal(ordinal: Int, value: Decimal): Unit = array.update(ordinal, value)
+  }
+}
+
+object AvroDeserializer {
+
+  // NOTE: Following methods have been renamed in Spark 3.1.3 [1] making [[AvroDeserializer]] implementation
+  //       (which relies on it) be only compatible with the exact same version of [[DataSourceUtils]].
+  //       To make sure this implementation is compatible w/ all Spark versions w/in Spark 3.1.x branch,
+  //       we're preemptively cloned those methods to make sure Hudi is compatible w/ Spark 3.1.2 as well as
+  //       w/ Spark >= 3.1.3
+  //
+  // [1] https://github.com/apache/spark/pull/34978
+
+  def createDateRebaseFuncInRead(rebaseMode: LegacyBehaviorPolicy.Value,
+                                 format: String): Int => Int = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => days: Int =>
+      if (days < RebaseDateTime.lastSwitchJulianDay) {
+        throw DataSourceUtils.newRebaseExceptionInRead(format)
+      }
+      days
+    case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseJulianToGregorianDays
+    case LegacyBehaviorPolicy.CORRECTED => identity[Int]
+  }
+
+  def createTimestampRebaseFuncInRead(rebaseMode: LegacyBehaviorPolicy.Value,
+                                      format: String): Long => Long = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => micros: Long =>
+      if (micros < RebaseDateTime.lastSwitchJulianTs) {
+        throw DataSourceUtils.newRebaseExceptionInRead(format)
+      }
+      micros
+    case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseJulianToGregorianMicros
+    case LegacyBehaviorPolicy.CORRECTED => identity[Long]
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
new file mode 100644
index 0000000000000..36d86c1e01f05
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.{LogicalTypes, Schema}
+import org.apache.avro.Schema.Type
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed, Record}
+import org.apache.avro.util.Utf8
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.avro.AvroSerializer.{createDateRebaseFuncInWrite, createTimestampRebaseFuncInWrite}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, RebaseDateTime}
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.types._
+
+import java.nio.ByteBuffer
+import scala.collection.JavaConverters._
+
+/**
+ * A serializer to serialize data in catalyst format to data in avro format.
+ *
+ * NOTE: This code is borrowed from Spark 3.1.2
+ * This code is borrowed, so that we can better control compatibility w/in Spark minor
+ * branches (3.2.x, 3.1.x, etc)
+ *
+ * PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+private[sql] class AvroSerializer(rootCatalystType: DataType,
+                                  rootAvroType: Schema,
+                                  nullable: Boolean,
+                                  datetimeRebaseMode: LegacyBehaviorPolicy.Value) extends Logging {
+
+  def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) = {
+    this(rootCatalystType, rootAvroType, nullable,
+      LegacyBehaviorPolicy.withName(SQLConf.get.getConf(
+        SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE)))
+  }
+
+  def serialize(catalystData: Any): Any = {
+    converter.apply(catalystData)
+  }
+
+  private val dateRebaseFunc = createDateRebaseFuncInWrite(
+    datetimeRebaseMode, "Avro")
+
+  private val timestampRebaseFunc = createTimestampRebaseFuncInWrite(
+    datetimeRebaseMode, "Avro")
+
+  private val converter: Any => Any = {
+    val actualAvroType = resolveNullableType(rootAvroType, nullable)
+    val baseConverter = rootCatalystType match {
+      case st: StructType =>
+        newStructConverter(st, actualAvroType).asInstanceOf[Any => Any]
+      case _ =>
+        val tmpRow = new SpecificInternalRow(Seq(rootCatalystType))
+        val converter = newConverter(rootCatalystType, actualAvroType)
+        (data: Any) =>
+          tmpRow.update(0, data)
+          converter.apply(tmpRow, 0)
+    }
+    if (nullable) {
+      (data: Any) =>
+        if (data == null) {
+          null
+        } else {
+          baseConverter.apply(data)
+        }
+    } else {
+      baseConverter
+    }
+  }
+
+  private type Converter = (SpecializedGetters, Int) => Any
+
+  private lazy val decimalConversions = new DecimalConversion()
+
+  private def newConverter(catalystType: DataType, avroType: Schema): Converter = {
+    (catalystType, avroType.getType) match {
+      case (NullType, NULL) =>
+        (getter, ordinal) => null
+      case (BooleanType, BOOLEAN) =>
+        (getter, ordinal) => getter.getBoolean(ordinal)
+      case (ByteType, INT) =>
+        (getter, ordinal) => getter.getByte(ordinal).toInt
+      case (ShortType, INT) =>
+        (getter, ordinal) => getter.getShort(ordinal).toInt
+      case (IntegerType, INT) =>
+        (getter, ordinal) => getter.getInt(ordinal)
+      case (LongType, LONG) =>
+        (getter, ordinal) => getter.getLong(ordinal)
+      case (FloatType, FLOAT) =>
+        (getter, ordinal) => getter.getFloat(ordinal)
+      case (DoubleType, DOUBLE) =>
+        (getter, ordinal) => getter.getDouble(ordinal)
+      case (d: DecimalType, FIXED)
+        if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) =>
+        (getter, ordinal) =>
+          val decimal = getter.getDecimal(ordinal, d.precision, d.scale)
+          decimalConversions.toFixed(decimal.toJavaBigDecimal, avroType,
+            LogicalTypes.decimal(d.precision, d.scale))
+
+      case (d: DecimalType, BYTES)
+        if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) =>
+        (getter, ordinal) =>
+          val decimal = getter.getDecimal(ordinal, d.precision, d.scale)
+          decimalConversions.toBytes(decimal.toJavaBigDecimal, avroType,
+            LogicalTypes.decimal(d.precision, d.scale))
+
+      case (StringType, ENUM) =>
+        val enumSymbols: Set[String] = avroType.getEnumSymbols.asScala.toSet
+        (getter, ordinal) =>
+          val data = getter.getUTF8String(ordinal).toString
+          if (!enumSymbols.contains(data)) {
+            throw new IncompatibleSchemaException(
+              "Cannot write \"" + data + "\" since it's not defined in enum \"" +
+                enumSymbols.mkString("\", \"") + "\"")
+          }
+          new EnumSymbol(avroType, data)
+
+      case (StringType, STRING) =>
+        (getter, ordinal) => new Utf8(getter.getUTF8String(ordinal).getBytes)
+
+      case (BinaryType, FIXED) =>
+        val size = avroType.getFixedSize()
+        (getter, ordinal) =>
+          val data: Array[Byte] = getter.getBinary(ordinal)
+          if (data.length != size) {
+            throw new IncompatibleSchemaException(
+              s"Cannot write ${data.length} ${if (data.length > 1) "bytes" else "byte"} of " +
+                "binary data into FIXED Type with size of " +
+                s"$size ${if (size > 1) "bytes" else "byte"}")
+          }
+          new Fixed(avroType, data)
+
+      case (BinaryType, BYTES) =>
+        (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal))
+
+      case (DateType, INT) =>
+        (getter, ordinal) => dateRebaseFunc(getter.getInt(ordinal))
+
+      case (TimestampType, LONG) => avroType.getLogicalType match {
+        // For backward compatibility, if the Avro type is Long and it is not logical type
+        // (the `null` case), output the timestamp value as with millisecond precision.
+        case null | _: TimestampMillis => (getter, ordinal) =>
+          DateTimeUtils.microsToMillis(timestampRebaseFunc(getter.getLong(ordinal)))
+        case _: TimestampMicros => (getter, ordinal) =>
+          timestampRebaseFunc(getter.getLong(ordinal))
+        case other => throw new IncompatibleSchemaException(
+          s"Cannot convert Catalyst Timestamp type to Avro logical type ${other}")
+      }
+
+      case (ArrayType(et, containsNull), ARRAY) =>
+        val elementConverter = newConverter(
+          et, resolveNullableType(avroType.getElementType, containsNull))
+        (getter, ordinal) => {
+          val arrayData = getter.getArray(ordinal)
+          val len = arrayData.numElements()
+          val result = new Array[Any](len)
+          var i = 0
+          while (i < len) {
+            if (containsNull && arrayData.isNullAt(i)) {
+              result(i) = null
+            } else {
+              result(i) = elementConverter(arrayData, i)
+            }
+            i += 1
+          }
+          // avro writer is expecting a Java Collection, so we convert it into
+          // `ArrayList` backed by the specified array without data copying.
+          java.util.Arrays.asList(result: _*)
+        }
+
+      case (st: StructType, RECORD) =>
+        val structConverter = newStructConverter(st, avroType)
+        val numFields = st.length
+        (getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
+
+      case (st: StructType, UNION) =>
+        val unionConverter = newUnionConverter(st, avroType)
+        val numFields = st.length
+        (getter, ordinal) => unionConverter(getter.getStruct(ordinal, numFields))
+
+      case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType =>
+        val valueConverter = newConverter(
+          vt, resolveNullableType(avroType.getValueType, valueContainsNull))
+        (getter, ordinal) =>
+          val mapData = getter.getMap(ordinal)
+          val len = mapData.numElements()
+          val result = new java.util.HashMap[String, Any](len)
+          val keyArray = mapData.keyArray()
+          val valueArray = mapData.valueArray()
+          var i = 0
+          while (i < len) {
+            val key = keyArray.getUTF8String(i).toString
+            if (valueContainsNull && valueArray.isNullAt(i)) {
+              result.put(key, null)
+            } else {
+              result.put(key, valueConverter(valueArray, i))
+            }
+            i += 1
+          }
+          result
+
+      case other =>
+        throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystType to " +
+          s"Avro type $avroType.")
+    }
+  }
+
+  private def newStructConverter(catalystStruct: StructType, avroStruct: Schema): InternalRow => Record = {
+    if (avroStruct.getType != RECORD || avroStruct.getFields.size() != catalystStruct.length) {
+      throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
+        s"Avro type $avroStruct.")
+    }
+    val avroSchemaHelper = new AvroUtils.AvroSchemaHelper(avroStruct)
+
+    val (avroIndices: Array[Int], fieldConverters: Array[Converter]) =
+      catalystStruct.map { catalystField =>
+        val avroField = avroSchemaHelper.getFieldByName(catalystField.name) match {
+          case Some(f) => f
+          case None => throw new IncompatibleSchemaException(
+            s"Cannot find ${catalystField.name} in Avro schema")
+        }
+        val converter = newConverter(catalystField.dataType, resolveNullableType(
+          avroField.schema(), catalystField.nullable))
+        (avroField.pos(), converter)
+      }.toArray.unzip
+
+    val numFields = catalystStruct.length
+    row: InternalRow =>
+      val result = new Record(avroStruct)
+      var i = 0
+      while (i < numFields) {
+        if (row.isNullAt(i)) {
+          result.put(avroIndices(i), null)
+        } else {
+          result.put(avroIndices(i), fieldConverters(i).apply(row, i))
+        }
+        i += 1
+      }
+      result
+  }
+
+  private def newUnionConverter(catalystStruct: StructType, avroUnion: Schema): InternalRow => Any = {
+    if (avroUnion.getType != UNION || !canMapUnion(catalystStruct, avroUnion)) {
+      throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
+        s"Avro type $avroUnion.")
+    }
+    val nullable = avroUnion.getTypes.size() > 0 && avroUnion.getTypes.get(0).getType == Type.NULL
+    val avroInnerTypes = if (nullable) {
+      avroUnion.getTypes.asScala.tail
+    } else {
+      avroUnion.getTypes.asScala
+    }
+    val fieldConverters = catalystStruct.zip(avroInnerTypes).map {
+      case (f1, f2) => newConverter(f1.dataType, f2)
+    }
+    val numFields = catalystStruct.length
+    (row: InternalRow) =>
+      var i = 0
+      var result: Any = null
+      while (i < numFields) {
+        if (!row.isNullAt(i)) {
+          if (result != null) {
+            throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
+              s"Avro union $avroUnion. Record has more than one optional values set")
+          }
+          result = fieldConverters(i).apply(row, i)
+        }
+        i += 1
+      }
+      if (!nullable && result == null) {
+        throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
+          s"Avro union $avroUnion. Record has no values set, while should have exactly one")
+      }
+      result
+  }
+
+  private def canMapUnion(catalystStruct: StructType, avroStruct: Schema): Boolean = {
+    (avroStruct.getTypes.size() > 0 &&
+      avroStruct.getTypes.get(0).getType == Type.NULL &&
+      avroStruct.getTypes.size() - 1 == catalystStruct.length) || avroStruct.getTypes.size() == catalystStruct.length
+  }
+
+  /**
+   * Resolve a possibly nullable Avro Type.
+   *
+   * An Avro type is nullable when it is a [[UNION]] of two types: one null type and another
+   * non-null type. This method will check the nullability of the input Avro type and return the
+   * non-null type within when it is nullable. Otherwise it will return the input Avro type
+   * unchanged. It will throw an [[UnsupportedAvroTypeException]] when the input Avro type is an
+   * unsupported nullable type.
+   *
+   * It will also log a warning message if the nullability for Avro and catalyst types are
+   * different.
+   */
+  private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = {
+    val (avroNullable, resolvedAvroType) = resolveAvroType(avroType)
+    warnNullabilityDifference(avroNullable, nullable)
+    resolvedAvroType
+  }
+
+  /**
+   * Check the nullability of the input Avro type and resolve it when it is nullable. The first
+   * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second
+   * return value is the possibly resolved type.
+   */
+  private def resolveAvroType(avroType: Schema): (Boolean, Schema) = {
+    if (avroType.getType == Type.UNION) {
+      val fields = avroType.getTypes.asScala
+      val actualType = fields.filter(_.getType != Type.NULL)
+      if (fields.length == 2 && actualType.length == 1) {
+        (true, actualType.head)
+      } else {
+        // This is just a normal union, not used to designate nullability
+        (false, avroType)
+      }
+    } else {
+      (false, avroType)
+    }
+  }
+
+  /**
+   * log a warning message if the nullability for Avro and catalyst types are different.
+   */
+  private def warnNullabilityDifference(avroNullable: Boolean, catalystNullable: Boolean): Unit = {
+    if (avroNullable && !catalystNullable) {
+      logWarning("Writing Avro files with nullable Avro schema and non-nullable catalyst schema.")
+    }
+    if (!avroNullable && catalystNullable) {
+      logWarning("Writing Avro files with non-nullable Avro schema and nullable catalyst " +
+        "schema will throw runtime exception if there is a record with null value.")
+    }
+  }
+}
+
+object AvroSerializer {
+
+  // NOTE: Following methods have been renamed in Spark 3.1.3 [1] making [[AvroDeserializer]] implementation
+  //       (which relies on it) be only compatible with the exact same version of [[DataSourceUtils]].
+  //       To make sure this implementation is compatible w/ all Spark versions w/in Spark 3.1.x branch,
+  //       we're preemptively cloned those methods to make sure Hudi is compatible w/ Spark 3.1.2 as well as
+  //       w/ Spark >= 3.1.3
+  //
+  // [1] https://github.com/apache/spark/pull/34978
+
+  def createDateRebaseFuncInWrite(rebaseMode: LegacyBehaviorPolicy.Value,
+                                  format: String): Int => Int = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => days: Int =>
+      if (days < RebaseDateTime.lastSwitchGregorianDay) {
+        throw DataSourceUtils.newRebaseExceptionInWrite(format)
+      }
+      days
+    case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseGregorianToJulianDays
+    case LegacyBehaviorPolicy.CORRECTED => identity[Int]
+  }
+
+  def createTimestampRebaseFuncInWrite(rebaseMode: LegacyBehaviorPolicy.Value,
+                                       format: String): Long => Long = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => micros: Long =>
+      if (micros < RebaseDateTime.lastSwitchGregorianTs) {
+        throw DataSourceUtils.newRebaseExceptionInWrite(format)
+      }
+      micros
+    case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseGregorianToJulianMicros
+    case LegacyBehaviorPolicy.CORRECTED => identity[Long]
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
new file mode 100644
index 0000000000000..54eacbaa0a182
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.internal.SQLConf
+
+import java.util.Locale
+import scala.collection.JavaConverters._
+
+/**
+ * NOTE: This code is borrowed from Spark 3.1.3
+ *       This code is borrowed, so that we can better control compatibility w/in Spark minor
+ *       branches (3.2.x, 3.1.x, etc)
+ *
+ *       PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+private[avro] object AvroUtils extends Logging {
+
+  /**
+   * Wraps an Avro Schema object so that field lookups are faster.
+   *
+   * @param avroSchema The schema in which to search for fields. Must be of type RECORD.
+   */
+  class AvroSchemaHelper(avroSchema: Schema) {
+    if (avroSchema.getType != Schema.Type.RECORD) {
+      throw new IncompatibleSchemaException(
+        s"Attempting to treat ${avroSchema.getName} as a RECORD, but it was: ${avroSchema.getType}")
+    }
+
+    private[this] val fieldMap = avroSchema.getFields.asScala
+      .groupBy(_.name.toLowerCase(Locale.ROOT))
+      .mapValues(_.toSeq) // toSeq needed for scala 2.13
+
+    /**
+     * Extract a single field from the contained avro schema which has the desired field name,
+     * performing the matching with proper case sensitivity according to SQLConf.resolver.
+     *
+     * @param name The name of the field to search for.
+     * @return `Some(match)` if a matching Avro field is found, otherwise `None`.
+     */
+    def getFieldByName(name: String): Option[Schema.Field] = {
+
+      // get candidates, ignoring case of field name
+      val candidates = fieldMap.get(name.toLowerCase(Locale.ROOT))
+        .getOrElse(Seq.empty[Schema.Field])
+
+      // search candidates, taking into account case sensitivity settings
+      candidates.filter(f => SQLConf.get.resolver(f.name(), name)) match {
+        case Seq(avroField) => Some(avroField)
+        case Seq() => None
+        case matches => throw new IncompatibleSchemaException(
+          s"Searching for '$name' in Avro schema gave ${matches.size} matches. Candidates: " +
+            matches.map(_.name()).mkString("[", ", ", "]")
+        )
+      }
+    }
+  }
+
+  /**
+   * Extract a single field from `avroSchema` which has the desired field name,
+   * performing the matching with proper case sensitivity according to [[SQLConf.resolver]].
+   *
+   * @param avroSchema The schema in which to search for the field. Must be of type RECORD.
+   * @param name The name of the field to search for.
+   * @return `Some(match)` if a matching Avro field is found, otherwise `None`.
+   * @throws IncompatibleSchemaException if `avroSchema` is not a RECORD or contains multiple
+   *                                     fields matching `name` (i.e., case-insensitive matching
+   *                                     is used and `avroSchema` has two or more fields that have
+   *                                     the same name with difference case).
+   */
+  private[avro] def getAvroFieldByName(
+                                        avroSchema: Schema,
+                                        name: String): Option[Schema.Field] = {
+    if (avroSchema.getType != Schema.Type.RECORD) {
+      throw new IncompatibleSchemaException(
+        s"Attempting to treat ${avroSchema.getName} as a RECORD, but it was: ${avroSchema.getType}")
+    }
+    avroSchema.getFields.asScala.filter(f => SQLConf.get.resolver(f.name(), name)).toSeq match {
+      case Seq(avroField) => Some(avroField)
+      case Seq() => None
+      case matches => throw new IncompatibleSchemaException(
+        s"Searching for '$name' in Avro schema gave ${matches.size} matches. Candidates: " +
+          matches.map(_.name()).mkString("[", ", ", "]")
+      )
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroDeserializer.scala
new file mode 100644
index 0000000000000..bf6fcbee7e9c1
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroDeserializer.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.types.DataType
+
+class HoodieSpark3_1AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
+  extends HoodieAvroDeserializer {
+
+  private val avroDeserializer = new AvroDeserializer(rootAvroType, rootCatalystType)
+
+  def deserialize(data: Any): Option[Any] = avroDeserializer.deserialize(data)
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroSerializer.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroSerializer.scala
new file mode 100644
index 0000000000000..942a4e1b3b599
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroSerializer.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.types.DataType
+
+class HoodieSpark3_1AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
+  extends HoodieAvroSerializer {
+
+  val avroSerializer = new AvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  override def serialize(catalystData: Any): Any = avroSerializer.serialize(catalystData)
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/HStatments.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/HStatments.scala
new file mode 100644
index 0000000000000..f78891fa61f5d
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/HStatments.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
+import org.apache.spark.sql.types.DataType
+
+/**
+  * ALTER TABLE ... ADD COLUMNS command, as parsed from SQL.
+  */
+case class HoodieAlterTableAddColumnsStatement(
+                                                tableName: Seq[String],
+                                                columnsToAdd: Seq[QualifiedColType]) extends ParsedStatement
+
+/**
+  * ALTER TABLE ... CHANGE COLUMN command, as parsed from SQL.
+  */
+case class HoodieAlterTableAlterColumnStatement(
+                                                 tableName: Seq[String],
+                                                 column: Seq[String],
+                                                 dataType: Option[DataType],
+                                                 nullable: Option[Boolean],
+                                                 comment: Option[String],
+                                                 position: Option[ColumnPosition]) extends ParsedStatement
+
+
+/**
+  * ALTER TABLE ... RENAME COLUMN command, as parsed from SQL.
+  */
+case class HoodieAlterTableRenameColumnStatement(
+                                                  tableName: Seq[String],
+                                                  column: Seq[String],
+                                                  newName: String) extends ParsedStatement
+
+/**
+  * ALTER TABLE ... DROP COLUMNS command, as parsed from SQL.
+  */
+case class HoodieAlterTableDropColumnsStatement(
+                                                 tableName: Seq[String], columnsToDrop: Seq[Seq[String]]) extends ParsedStatement
+
+/**
+  * ALTER TABLE ... SET TBLPROPERTIES command, as parsed from SQL.
+  */
+case class HoodieAlterTableSetPropertiesStatement(
+                                                   tableName: Seq[String], properties: Map[String, String]) extends ParsedStatement
+
+/**
+  * ALTER TABLE ... UNSET TBLPROPERTIES command, as parsed from SQL.
+  */
+case class HoodieAlterTableUnsetPropertiesStatement(
+                                                     tableName: Seq[String], propertyKeys: Seq[String], ifExists: Boolean) extends ParsedStatement
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark312HoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark312HoodieParquetFileFormat.scala
new file mode 100644
index 0000000000000..83b3162bbc328
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark312HoodieParquetFileFormat.scala
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.net.URI
+import java.util
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter
+import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.HoodieSparkUtils
+import org.apache.hudi.common.util.InternalSchemaCache
+import org.apache.hudi.common.util.collection.Pair
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper}
+import org.apache.hudi.internal.schema.action.InternalSchemaMerger
+import org.apache.parquet.filter2.compat.FilterCompat
+import org.apache.parquet.filter2.predicate.FilterApi
+import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS
+import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetRecordReader}
+
+import org.apache.spark.TaskContext
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Cast, JoinedRow}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.{DataSourceUtils, PartitionedFile, RecordReaderIterator}
+import org.apache.spark.sql.execution.datasources.parquet._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types.{AtomicType, DataType, StructField, StructType}
+import org.apache.spark.util.SerializableConfiguration
+
+class Spark312HoodieParquetFileFormat extends ParquetFileFormat {
+
+  // reference ParquetFileFormat from spark project
+  override def buildReaderWithPartitionValues(
+                                               sparkSession: SparkSession,
+                                               dataSchema: StructType,
+                                               partitionSchema: StructType,
+                                               requiredSchema: StructType,
+                                               filters: Seq[Filter],
+                                               options: Map[String, String],
+                                               hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    if (hadoopConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, "").isEmpty) {
+      // fallback to origin parquet File read
+      super.buildReaderWithPartitionValues(sparkSession, dataSchema, partitionSchema, requiredSchema, filters, options, hadoopConf)
+    } else {
+      hadoopConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[ParquetReadSupport].getName)
+      hadoopConf.set(
+        ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
+        requiredSchema.json)
+      hadoopConf.set(
+        ParquetWriteSupport.SPARK_ROW_SCHEMA,
+        requiredSchema.json)
+      hadoopConf.set(
+        SQLConf.SESSION_LOCAL_TIMEZONE.key,
+        sparkSession.sessionState.conf.sessionLocalTimeZone)
+      hadoopConf.setBoolean(
+        SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key,
+        sparkSession.sessionState.conf.nestedSchemaPruningEnabled)
+      hadoopConf.setBoolean(
+        SQLConf.CASE_SENSITIVE.key,
+        sparkSession.sessionState.conf.caseSensitiveAnalysis)
+
+      ParquetWriteSupport.setSchema(requiredSchema, hadoopConf)
+
+      // Sets flags for `ParquetToSparkSchemaConverter`
+      hadoopConf.setBoolean(
+        SQLConf.PARQUET_BINARY_AS_STRING.key,
+        sparkSession.sessionState.conf.isParquetBinaryAsString)
+      hadoopConf.setBoolean(
+        SQLConf.PARQUET_INT96_AS_TIMESTAMP.key,
+        sparkSession.sessionState.conf.isParquetINT96AsTimestamp)
+      // for dataSource v1, we have no method to do project for spark physical plan.
+      // it's safe to do cols project here.
+      val internalSchemaString = hadoopConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA)
+      val querySchemaOption = SerDeHelper.fromJson(internalSchemaString)
+      if (querySchemaOption.isPresent && !requiredSchema.isEmpty) {
+        val prunedSchema = SparkInternalSchemaConverter.convertAndPruneStructTypeToInternalSchema(requiredSchema, querySchemaOption.get())
+        hadoopConf.set(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, SerDeHelper.toJson(prunedSchema))
+      }
+      val broadcastedHadoopConf =
+        sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+
+      // TODO: if you move this into the closure it reverts to the default values.
+      // If true, enable using the custom RecordReader for parquet. This only works for
+      // a subset of the types (no complex types).
+      val resultSchema = StructType(partitionSchema.fields ++ requiredSchema.fields)
+      val sqlConf = sparkSession.sessionState.conf
+      val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled
+      val enableVectorizedReader: Boolean =
+        sqlConf.parquetVectorizedReaderEnabled &&
+          resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
+      val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled
+      val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion
+      val capacity = sqlConf.parquetVectorizedReaderBatchSize
+      val enableParquetFilterPushDown: Boolean = sqlConf.parquetFilterPushDown
+      // Whole stage codegen (PhysicalRDD) is able to deal with batches directly
+      val returningBatch = supportBatch(sparkSession, resultSchema)
+      val pushDownDate = sqlConf.parquetFilterPushDownDate
+      val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
+      val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
+      val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringStartWith
+      val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
+      val isCaseSensitive = sqlConf.caseSensitiveAnalysis
+
+      (file: PartitionedFile) => {
+        assert(file.partitionValues.numFields == partitionSchema.size)
+        val filePath = new Path(new URI(file.filePath))
+        val split =
+          new org.apache.parquet.hadoop.ParquetInputSplit(
+            filePath,
+            file.start,
+            file.start + file.length,
+            file.length,
+            Array.empty,
+            null)
+        val sharedConf = broadcastedHadoopConf.value.value
+        // do deal with internalSchema
+        val internalSchemaString = sharedConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA)
+        // querySchema must be a pruned schema.
+        val querySchemaOption = SerDeHelper.fromJson(internalSchemaString)
+        val internalSchemaChangeEnabled = if (internalSchemaString.isEmpty || !querySchemaOption.isPresent) false else true
+        val tablePath = sharedConf.get(SparkInternalSchemaConverter.HOODIE_TABLE_PATH)
+        val commitInstantTime = FSUtils.getCommitTime(filePath.getName).toLong;
+        val fileSchema = if (internalSchemaChangeEnabled) {
+          val validCommits = sharedConf.get(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST)
+          InternalSchemaCache.getInternalSchemaByVersionId(commitInstantTime, tablePath, sharedConf, if (validCommits == null) "" else validCommits)
+        } else {
+          // this should not happened, searchSchemaAndCache will deal with correctly.
+          null
+        }
+
+        lazy val footerFileMetaData =
+          ParquetFileReader.readFooter(sharedConf, filePath, SKIP_ROW_GROUPS).getFileMetaData
+        val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+          footerFileMetaData.getKeyValueMetaData.get,
+          SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ))
+        // Try to push down filters when filter push-down is enabled.
+        val pushed = if (enableParquetFilterPushDown) {
+          val parquetSchema = footerFileMetaData.getSchema
+          val parquetFilters = if (HoodieSparkUtils.gteqSpark3_1_3) {
+            Spark312HoodieParquetFileFormat.createParquetFilters(
+              parquetSchema,
+              pushDownDate,
+              pushDownTimestamp,
+              pushDownDecimal,
+              pushDownStringStartWith,
+              pushDownInFilterThreshold,
+              isCaseSensitive,
+              datetimeRebaseMode)
+          } else {
+            Spark312HoodieParquetFileFormat.createParquetFilters(
+              parquetSchema,
+              pushDownDate,
+              pushDownTimestamp,
+              pushDownDecimal,
+              pushDownStringStartWith,
+              pushDownInFilterThreshold,
+              isCaseSensitive)
+          }
+          filters.map(Spark312HoodieParquetFileFormat.rebuildFilterFromParquet(_, fileSchema, querySchemaOption.get()))
+            // Collects all converted Parquet filter predicates. Notice that not all predicates can be
+            // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
+            // is used here.
+            .flatMap(parquetFilters.createFilter(_))
+            .reduceOption(FilterApi.and)
+        } else {
+          None
+        }
+
+        // PARQUET_INT96_TIMESTAMP_CONVERSION says to apply timezone conversions to int96 timestamps'
+        // *only* if the file was created by something other than "parquet-mr", so check the actual
+        // writer here for this file.  We have to do this per-file, as each file in the table may
+        // have different writers.
+        // Define isCreatedByParquetMr as function to avoid unnecessary parquet footer reads.
+        def isCreatedByParquetMr: Boolean =
+          footerFileMetaData.getCreatedBy().startsWith("parquet-mr")
+
+        val convertTz =
+          if (timestampConversion && !isCreatedByParquetMr) {
+            Some(DateTimeUtils.getZoneId(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
+          } else {
+            None
+          }
+        val int96RebaseMode = DataSourceUtils.int96RebaseMode(
+          footerFileMetaData.getKeyValueMetaData.get,
+          SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ))
+
+        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+        // use new conf
+        val hadoopAttempConf = new Configuration(broadcastedHadoopConf.value.value)
+        //
+        // reset request schema
+        var typeChangeInfos: java.util.Map[Integer, Pair[DataType, DataType]] = new java.util.HashMap()
+        if (internalSchemaChangeEnabled) {
+          val mergedInternalSchema = new InternalSchemaMerger(fileSchema, querySchemaOption.get(), true, true).mergeSchema()
+          val mergedSchema = SparkInternalSchemaConverter.constructSparkSchemaFromInternalSchema(mergedInternalSchema)
+          typeChangeInfos = SparkInternalSchemaConverter.collectTypeChangedCols(querySchemaOption.get(), mergedInternalSchema)
+          hadoopAttempConf.set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA, mergedSchema.json)
+        }
+        val hadoopAttemptContext =
+          new TaskAttemptContextImpl(hadoopAttempConf, attemptId)
+
+        // Try to push down filters when filter push-down is enabled.
+        // Notice: This push-down is RowGroups level, not individual records.
+        if (pushed.isDefined) {
+          ParquetInputFormat.setFilterPredicate(hadoopAttemptContext.getConfiguration, pushed.get)
+        }
+        val taskContext = Option(TaskContext.get())
+        if (enableVectorizedReader) {
+          val vectorizedReader = new Spark312HoodieVectorizedParquetRecordReader(
+            convertTz.orNull,
+            datetimeRebaseMode.toString,
+            int96RebaseMode.toString,
+            enableOffHeapColumnVector && taskContext.isDefined,
+            capacity, typeChangeInfos)
+          val iter = new RecordReaderIterator(vectorizedReader)
+          // SPARK-23457 Register a task completion listener before `initialization`.
+          taskContext.foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
+          vectorizedReader.initialize(split, hadoopAttemptContext)
+          logDebug(s"Appending $partitionSchema ${file.partitionValues}")
+          vectorizedReader.initBatch(partitionSchema, file.partitionValues)
+          if (returningBatch) {
+            vectorizedReader.enableReturningBatches()
+          }
+
+          // UnsafeRowParquetRecordReader appends the columns internally to avoid another copy.
+          iter.asInstanceOf[Iterator[InternalRow]]
+        } else {
+          logDebug(s"Falling back to parquet-mr")
+          // ParquetRecordReader returns InternalRow
+          val readSupport = new ParquetReadSupport(
+            convertTz,
+            enableVectorizedReader = false,
+            datetimeRebaseMode,
+            int96RebaseMode)
+          val reader = if (pushed.isDefined && enableRecordFilter) {
+            val parquetFilter = FilterCompat.get(pushed.get, null)
+            new ParquetRecordReader[InternalRow](readSupport, parquetFilter)
+          } else {
+            new ParquetRecordReader[InternalRow](readSupport)
+          }
+          val iter = new RecordReaderIterator[InternalRow](reader)
+          // SPARK-23457 Register a task completion listener before `initialization`.
+          taskContext.foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
+          reader.initialize(split, hadoopAttemptContext)
+
+          val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes
+          val unsafeProjection = if (typeChangeInfos.isEmpty) {
+            GenerateUnsafeProjection.generate(fullSchema, fullSchema)
+          } else {
+            // find type changed.
+            val newFullSchema = new StructType(requiredSchema.fields.zipWithIndex.map { case (f, i) =>
+              if (typeChangeInfos.containsKey(i)) {
+                StructField(f.name, typeChangeInfos.get(i).getRight, f.nullable, f.metadata)
+              } else f
+            }).toAttributes ++ partitionSchema.toAttributes
+            val castSchema = newFullSchema.zipWithIndex.map { case (attr, i) =>
+              if (typeChangeInfos.containsKey(i)) {
+                Cast(attr, typeChangeInfos.get(i).getLeft)
+              } else attr
+            }
+            GenerateUnsafeProjection.generate(castSchema, newFullSchema)
+          }
+
+          if (partitionSchema.length == 0) {
+            // There is no partition columns
+            iter.map(unsafeProjection)
+          } else {
+            val joinedRow = new JoinedRow()
+            iter.map(d => unsafeProjection(joinedRow(d, file.partitionValues)))
+          }
+        }
+      }
+    }
+  }
+}
+
+object Spark312HoodieParquetFileFormat {
+
+  val PARQUET_FILTERS_CLASS_NAME = "org.apache.spark.sql.execution.datasources.parquet.ParquetFilters"
+
+  private def createParquetFilters(arg: Any*): ParquetFilters = {
+    val clazz = Class.forName(PARQUET_FILTERS_CLASS_NAME, true, Thread.currentThread().getContextClassLoader)
+    val ctor = clazz.getConstructors.head
+    ctor.newInstance(arg.map(_.asInstanceOf[AnyRef]): _*).asInstanceOf[ParquetFilters]
+  }
+
+  private def rebuildFilterFromParquet(oldFilter: Filter, fileSchema: InternalSchema, querySchema: InternalSchema): Filter = {
+    if (fileSchema == null || querySchema == null) {
+      oldFilter
+    } else {
+      oldFilter match {
+        case eq: EqualTo =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(eq.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else eq.copy(attribute = newAttribute)
+        case eqs: EqualNullSafe =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(eqs.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else eqs.copy(attribute = newAttribute)
+        case gt: GreaterThan =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(gt.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else gt.copy(attribute = newAttribute)
+        case gtr: GreaterThanOrEqual =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(gtr.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else gtr.copy(attribute = newAttribute)
+        case lt: LessThan =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(lt.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else lt.copy(attribute = newAttribute)
+        case lte: LessThanOrEqual =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(lte.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else lte.copy(attribute = newAttribute)
+        case i: In =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(i.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else i.copy(attribute = newAttribute)
+        case isn: IsNull =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(isn.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else isn.copy(attribute = newAttribute)
+        case isnn: IsNotNull =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(isnn.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else isnn.copy(attribute = newAttribute)
+        case And(left, right) =>
+          And(rebuildFilterFromParquet(left, fileSchema, querySchema), rebuildFilterFromParquet(right, fileSchema, querySchema))
+        case Or(left, right) =>
+          Or(rebuildFilterFromParquet(left, fileSchema, querySchema), rebuildFilterFromParquet(right, fileSchema, querySchema))
+        case Not(child) =>
+          Not(rebuildFilterFromParquet(child, fileSchema, querySchema))
+        case ssw: StringStartsWith =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(ssw.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else ssw.copy(attribute = newAttribute)
+        case ses: StringEndsWith =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(ses.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else ses.copy(attribute = newAttribute)
+        case sc: StringContains =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(sc.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else sc.copy(attribute = newAttribute)
+        case AlwaysTrue =>
+          AlwaysTrue
+        case AlwaysFalse =>
+          AlwaysFalse
+        case _ =>
+          AlwaysTrue
+      }
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommand312.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommand312.scala
new file mode 100644
index 0000000000000..522cecdaaf07f
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommand312.scala
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.hudi
+
+import java.util.Locale
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.internal.schema.action.TableChange.ColumnChangeID
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableChange}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.CatalogV2Util.failNullType
+import org.apache.spark.sql.connector.catalog.TableChange._
+import org.apache.spark.sql.hudi.command.AlterTableCommand312
+import org.apache.spark.sql.types.StructType
+
+import scala.collection.mutable
+
+/**
+  * Rule to mostly resolve, normalize and rewrite column names based on case sensitivity
+  * for alter table column commands.
+  * TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x
+  */
+case class ResolveHudiAlterTableCommand312(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case add @ HoodieAlterTableAddColumnsStatement(asTable(table), cols) =>
+      if (isHoodieTable(table) && schemaEvolutionEnabled){
+        cols.foreach(c => CatalogV2Util.failNullType(c.dataType))
+        val changes = cols.map { col =>
+          TableChange.addColumn(
+            col.name.toArray,
+            col.dataType,
+            col.nullable,
+            col.comment.orNull,
+            col.position.orNull)
+        }
+        val newChanges = normalizeChanges(changes, table.schema)
+        AlterTableCommand312(table, newChanges, ColumnChangeID.ADD)
+      } else {
+        // throw back to spark
+        AlterTableAddColumnsStatement(add.tableName, add.columnsToAdd)
+      }
+    case a @ HoodieAlterTableAlterColumnStatement(asTable(table), _, _, _, _, _) =>
+      if (isHoodieTable(table) && schemaEvolutionEnabled){
+        a.dataType.foreach(failNullType)
+        val colName = a.column.toArray
+        val typeChange = a.dataType.map { newDataType =>
+          TableChange.updateColumnType(colName, newDataType)
+        }
+        val nullabilityChange = a.nullable.map { nullable =>
+          TableChange.updateColumnNullability(colName, nullable)
+        }
+        val commentChange = a.comment.map { newComment =>
+          TableChange.updateColumnComment(colName, newComment)
+        }
+        val positionChange = a.position.map { newPosition =>
+          TableChange.updateColumnPosition(colName, newPosition)
+        }
+        AlterTableCommand312(table, normalizeChanges(typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange, table.schema), ColumnChangeID.UPDATE)
+      } else {
+        // throw back to spark
+        AlterTableAlterColumnStatement(a.tableName, a.column, a.dataType, a.nullable, a.comment, a.position)
+      }
+    case rename @ HoodieAlterTableRenameColumnStatement(asTable(table), col, newName) =>
+      if (isHoodieTable(table) && schemaEvolutionEnabled){
+        val changes = Seq(TableChange.renameColumn(col.toArray, newName))
+        AlterTableCommand312(table, normalizeChanges(changes, table.schema), ColumnChangeID.UPDATE)
+      } else {
+        // throw back to spark
+        AlterTableRenameColumnStatement(rename.tableName, rename.column, rename.newName)
+      }
+    case drop @ HoodieAlterTableDropColumnsStatement(asTable(table), cols) =>
+      if (isHoodieTable(table) && schemaEvolutionEnabled) {
+        val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
+        AlterTableCommand312(table, normalizeChanges(changes, table.schema), ColumnChangeID.DELETE)
+      } else {
+        // throw back to spark
+        AlterTableDropColumnsStatement(drop.tableName, drop.columnsToDrop)
+      }
+    case set @ HoodieAlterTableSetPropertiesStatement(asTable(table), props) =>
+      if (isHoodieTable(table) && schemaEvolutionEnabled) {
+        val changes = props.map { case (key, value) =>
+          TableChange.setProperty(key, value)
+        }.toSeq
+        AlterTableCommand312(table, normalizeChanges(changes, table.schema), ColumnChangeID.PROPERTY_CHANGE)
+      } else {
+        // throw back to spark
+        AlterTableSetPropertiesStatement(set.tableName, set.properties)
+      }
+    case unset @ HoodieAlterTableUnsetPropertiesStatement(asTable(table), keys, _) =>
+      if (isHoodieTable(table) && schemaEvolutionEnabled) {
+        val changes = keys.map(key => TableChange.removeProperty(key))
+        AlterTableCommand312(table, normalizeChanges(changes, table.schema), ColumnChangeID.PROPERTY_CHANGE)
+      } else {
+        // throw back to spark
+        AlterTableUnsetPropertiesStatement(unset.tableName, unset.propertyKeys, unset.ifExists)
+      }
+  }
+
+  private def schemaEvolutionEnabled(): Boolean = sparkSession
+    .sessionState.conf.getConfString(HoodieWriteConfig.SCHEMA_EVOLUTION_ENABLE.key(), "false").toBoolean
+
+  private def isHoodieTable(table: CatalogTable): Boolean = table.provider.map(_.toLowerCase(Locale.ROOT)).orNull == "hudi"
+
+  def normalizeChanges(changes: Seq[TableChange], schema: StructType): Seq[TableChange] = {
+    val colsToAdd = mutable.Map.empty[Seq[String], Seq[String]]
+    changes.flatMap {
+      case add: AddColumn =>
+        def addColumn(parentSchema: StructType, parentName: String, normalizedParentName: Seq[String]): TableChange = {
+          val fieldsAdded = colsToAdd.getOrElse(normalizedParentName, Nil)
+          val pos = findColumnPosition(add.position(), parentName, parentSchema, fieldsAdded)
+          val field = add.fieldNames().last
+          colsToAdd(normalizedParentName) = fieldsAdded :+ field
+          TableChange.addColumn(
+            (normalizedParentName :+ field).toArray,
+            add.dataType(),
+            add.isNullable,
+            add.comment,
+            pos)
+        }
+        val parent = add.fieldNames().init
+        if (parent.nonEmpty) {
+          // Adding a nested field, need to normalize the parent column and position
+          val target = schema.findNestedField(parent, includeCollections = true, conf.resolver)
+          if (target.isEmpty) {
+            // Leave unresolved. Throws error in CheckAnalysis
+            Some(add)
+          } else {
+            val (normalizedName, sf) = target.get
+            sf.dataType match {
+              case struct: StructType =>
+                Some(addColumn(struct, parent.quoted, normalizedName :+ sf.name))
+              case other =>
+                Some(add)
+            }
+          }
+        } else {
+          // Adding to the root. Just need to normalize position
+          Some(addColumn(schema, "root", Nil))
+        }
+
+      case typeChange: UpdateColumnType =>
+        // Hive style syntax provides the column type, even if it may not have changed
+        val fieldOpt = schema.findNestedField(
+          typeChange.fieldNames(), includeCollections = true, conf.resolver)
+
+        if (fieldOpt.isEmpty) {
+          // We couldn't resolve the field. Leave it to CheckAnalysis
+          Some(typeChange)
+        } else {
+          val (fieldNames, field) = fieldOpt.get
+          if (field.dataType == typeChange.newDataType()) {
+            // The user didn't want the field to change, so remove this change
+            None
+          } else {
+            Some(TableChange.updateColumnType(
+              (fieldNames :+ field.name).toArray, typeChange.newDataType()))
+          }
+        }
+      case n: UpdateColumnNullability =>
+        // Need to resolve column
+        resolveFieldNames(
+          schema,
+          n.fieldNames(),
+          TableChange.updateColumnNullability(_, n.nullable())).orElse(Some(n))
+
+      case position: UpdateColumnPosition =>
+        position.position() match {
+          case after: After =>
+            // Need to resolve column as well as position reference
+            val fieldOpt = schema.findNestedField(
+              position.fieldNames(), includeCollections = true, conf.resolver)
+
+            if (fieldOpt.isEmpty) {
+              Some(position)
+            } else {
+              val (normalizedPath, field) = fieldOpt.get
+              val targetCol = schema.findNestedField(
+                normalizedPath :+ after.column(), includeCollections = true, conf.resolver)
+              if (targetCol.isEmpty) {
+                // Leave unchanged to CheckAnalysis
+                Some(position)
+              } else {
+                Some(TableChange.updateColumnPosition(
+                  (normalizedPath :+ field.name).toArray,
+                  ColumnPosition.after(targetCol.get._2.name)))
+              }
+            }
+          case _ =>
+            // Need to resolve column
+            resolveFieldNames(
+              schema,
+              position.fieldNames(),
+              TableChange.updateColumnPosition(_, position.position())).orElse(Some(position))
+        }
+
+      case comment: UpdateColumnComment =>
+        resolveFieldNames(
+          schema,
+          comment.fieldNames(),
+          TableChange.updateColumnComment(_, comment.newComment())).orElse(Some(comment))
+
+      case rename: RenameColumn =>
+        resolveFieldNames(
+          schema,
+          rename.fieldNames(),
+          TableChange.renameColumn(_, rename.newName())).orElse(Some(rename))
+
+      case delete: DeleteColumn =>
+        resolveFieldNames(schema, delete.fieldNames(), TableChange.deleteColumn)
+          .orElse(Some(delete))
+
+      case column: ColumnChange =>
+        // This is informational for future developers
+        throw new UnsupportedOperationException(
+          "Please add an implementation for a column change here")
+      case other => Some(other)
+    }
+  }
+
+  /**
+    * Returns the table change if the field can be resolved, returns None if the column is not
+    * found. An error will be thrown in CheckAnalysis for columns that can't be resolved.
+    */
+  private def resolveFieldNames(
+                                 schema: StructType,
+                                 fieldNames: Array[String],
+                                 copy: Array[String] => TableChange): Option[TableChange] = {
+    val fieldOpt = schema.findNestedField(
+      fieldNames, includeCollections = true, conf.resolver)
+    fieldOpt.map { case (path, field) => copy((path :+ field.name).toArray) }
+  }
+
+  private def findColumnPosition(
+                                  position: ColumnPosition,
+                                  parentName: String,
+                                  struct: StructType,
+                                  fieldsAdded: Seq[String]): ColumnPosition = {
+    position match {
+      case null => null
+      case after: After =>
+        (struct.fieldNames ++ fieldsAdded).find(n => conf.resolver(n, after.column())) match {
+          case Some(colName) =>
+            ColumnPosition.after(colName)
+          case None =>
+            throw new AnalysisException("Couldn't find the reference column for " +
+              s"$after at $parentName")
+        }
+      case other => other
+    }
+  }
+
+  object asTable {
+    def unapply(parts: Seq[String]): Option[CatalogTable] = {
+      val identifier = parts match {
+        case Seq(tblName) => TableIdentifier(tblName)
+        case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName))
+        case _ =>
+          throw new AnalysisException(
+            s"${parts} is not a valid TableIdentifier as it has more than 2 name parts.")
+      }
+      Some(sparkSession.sessionState.catalog.getTableMetadata(identifier))
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand312.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand312.scala
new file mode 100644
index 0000000000000..5d78018f02465
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand312.scala
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command
+
+import java.net.URI
+import java.nio.charset.StandardCharsets
+import java.util
+import java.util.concurrent.atomic.AtomicInteger
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter
+import org.apache.hudi.common.model.{HoodieCommitMetadata, WriteOperationType}
+import org.apache.hudi.{AvroConversionUtils, DataSourceOptionsHelper, DataSourceUtils}
+import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstant}
+import org.apache.hudi.common.table.timeline.HoodieInstant.State
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.util.{CommitUtils, Option}
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.action.TableChange.ColumnChangeID
+import org.apache.hudi.internal.schema.action.TableChanges
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
+import org.apache.hudi.internal.schema.utils.{SchemaChangeUtils, SerDeHelper}
+import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager
+import org.apache.hudi.table.HoodieSparkTable
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.connector.catalog.{TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, DeleteColumn, RemoveProperty, SetProperty}
+import org.apache.spark.sql.execution.command.RunnableCommand
+import org.apache.spark.sql.types.StructType
+
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+// TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x
+case class AlterTableCommand312(table: CatalogTable, changes: Seq[TableChange], changeType: ColumnChangeID) extends RunnableCommand with Logging {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    changeType match {
+      case ColumnChangeID.ADD => applyAddAction(sparkSession)
+      case ColumnChangeID.DELETE => applyDeleteAction(sparkSession)
+      case ColumnChangeID.UPDATE => applyUpdateAction(sparkSession)
+      case ColumnChangeID.PROPERTY_CHANGE if (changes.filter(_.isInstanceOf[SetProperty]).size == changes.size) =>
+        applyPropertySet(sparkSession)
+      case ColumnChangeID.PROPERTY_CHANGE if (changes.filter(_.isInstanceOf[RemoveProperty]).size == changes.size) =>
+        applyPropertyUnset(sparkSession)
+      case other => throw new RuntimeException(s"find unsupported alter command type: ${other}")
+    }
+    Seq.empty[Row]
+  }
+
+  def applyAddAction(sparkSession: SparkSession): Unit = {
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val addChange = TableChanges.ColumnAddChange.get(oldSchema)
+    changes.map(_.asInstanceOf[AddColumn]).foreach { addColumn =>
+      val names = addColumn.fieldNames()
+      val parentName = AlterTableCommand312.getParentName(names)
+      // add col change
+      val colType = SparkInternalSchemaConverter.buildTypeFromStructType(addColumn.dataType(), true, new AtomicInteger(0))
+      addChange.addColumns(parentName, names.last, colType, addColumn.comment())
+      // add position change
+      addColumn.position() match {
+        case after: TableChange.After =>
+          addChange.addPositionChange(names.mkString("."),
+            if (parentName.isEmpty) after.column() else parentName + "." + after.column(), "after")
+        case _: TableChange.First =>
+          addChange.addPositionChange(names.mkString("."), "", "first")
+        case _ =>
+      }
+    }
+    val newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, addChange)
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand312.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column add finished")
+  }
+
+  def applyDeleteAction(sparkSession: SparkSession): Unit = {
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val deleteChange = TableChanges.ColumnDeleteChange.get(oldSchema)
+    changes.map(_.asInstanceOf[DeleteColumn]).foreach { c =>
+      val originalColName = c.fieldNames().mkString(".");
+      AlterTableCommand312.checkSchemaChange(Seq(originalColName), table)
+      deleteChange.deleteColumn(originalColName)
+    }
+    val newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, deleteChange)
+    // delete action should not change the getMaxColumnId field.
+    newSchema.setMaxColumnId(oldSchema.getMaxColumnId)
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand312.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column delete finished")
+  }
+
+  def applyUpdateAction(sparkSession: SparkSession): Unit = {
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val updateChange = TableChanges.ColumnUpdateChange.get(oldSchema)
+    changes.foreach { change =>
+      change match {
+        case updateType: TableChange.UpdateColumnType =>
+          val newType = SparkInternalSchemaConverter.buildTypeFromStructType(updateType.newDataType(), true, new AtomicInteger(0))
+          updateChange.updateColumnType(updateType.fieldNames().mkString("."), newType)
+        case updateComment: TableChange.UpdateColumnComment =>
+          updateChange.updateColumnComment(updateComment.fieldNames().mkString("."), updateComment.newComment())
+        case updateName: TableChange.RenameColumn =>
+          val originalColName = updateName.fieldNames().mkString(".")
+          AlterTableCommand312.checkSchemaChange(Seq(originalColName), table)
+          updateChange.renameColumn(originalColName, updateName.newName())
+        case updateNullAbility: TableChange.UpdateColumnNullability =>
+          updateChange.updateColumnNullability(updateNullAbility.fieldNames().mkString("."), updateNullAbility.nullable())
+        case updatePosition: TableChange.UpdateColumnPosition =>
+          val names = updatePosition.fieldNames()
+          val parentName = AlterTableCommand312.getParentName(names)
+          updatePosition.position() match {
+            case after: TableChange.After =>
+              updateChange.addPositionChange(names.mkString("."),
+                if (parentName.isEmpty) after.column() else parentName + "." + after.column(), "after")
+            case _: TableChange.First =>
+              updateChange.addPositionChange(names.mkString("."), "", "first")
+            case _ =>
+          }
+      }
+    }
+    val newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, updateChange)
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand312.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column update finished")
+  }
+
+  // to do support unset default value to columns, and apply them to internalSchema
+  def applyPropertyUnset(sparkSession: SparkSession): Unit = {
+    val catalog = sparkSession.sessionState.catalog
+    val propKeys = changes.map(_.asInstanceOf[RemoveProperty]).map(_.property())
+    // ignore NonExist unset
+    propKeys.foreach { k =>
+      if (!table.properties.contains(k) && k != TableCatalog.PROP_COMMENT) {
+        logWarning(s"find non exist unset property: ${k}  , ignore it")
+      }
+    }
+    val tableComment = if (propKeys.contains(TableCatalog.PROP_COMMENT)) None else table.comment
+    val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) }
+    val newTable = table.copy(properties = newProperties, comment = tableComment)
+    catalog.alterTable(newTable)
+    logInfo("table properties change finished")
+  }
+
+  // to do support set default value to columns, and apply them to internalSchema
+  def applyPropertySet(sparkSession: SparkSession): Unit = {
+    val catalog = sparkSession.sessionState.catalog
+    val properties = changes.map(_.asInstanceOf[SetProperty]).map(f => f.property -> f.value).toMap
+    // This overrides old properties and update the comment parameter of CatalogTable
+    // with the newly added/modified comment since CatalogTable also holds comment as its
+    // direct property.
+    val newTable = table.copy(
+      properties = table.properties ++ properties,
+      comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment))
+    catalog.alterTable(newTable)
+    logInfo("table properties change finished")
+  }
+
+  def getInternalSchemaAndHistorySchemaStr(sparkSession: SparkSession): (InternalSchema, String) = {
+    val path = AlterTableCommand312.getTableLocation(table, sparkSession)
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    val metaClient = HoodieTableMetaClient.builder().setBasePath(path)
+      .setConf(hadoopConf).build()
+    val schemaUtil = new TableSchemaResolver(metaClient)
+
+    val schema = schemaUtil.getTableInternalSchemaFromCommitMetadata().orElse {
+      AvroInternalSchemaConverter.convert(schemaUtil.getTableAvroSchema)
+    }
+
+    val historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata.orElse("")
+    (schema, historySchemaStr)
+  }
+}
+
+object AlterTableCommand312 extends Logging {
+
+  /**
+    * Generate an commit with new schema to change the table's schema.
+    * @param internalSchema new schema after change
+    * @param historySchemaStr history schemas
+    * @param table The hoodie table.
+    * @param sparkSession The spark session.
+    */
+  def commitWithSchema(internalSchema: InternalSchema, historySchemaStr: String, table: CatalogTable, sparkSession: SparkSession): Unit = {
+    val schema = AvroInternalSchemaConverter.convert(internalSchema, table.identifier.table)
+    val path = getTableLocation(table, sparkSession)
+
+    val jsc = new JavaSparkContext(sparkSession.sparkContext)
+    val client = DataSourceUtils.createHoodieClient(jsc, schema.toString,
+      path, table.identifier.table, parametersWithWriteDefaults(table.storage.properties).asJava)
+
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    val metaClient = HoodieTableMetaClient.builder().setBasePath(path).setConf(hadoopConf).build()
+
+    val commitActionType = CommitUtils.getCommitActionType(WriteOperationType.ALTER_SCHEMA, metaClient.getTableType)
+    val instantTime = HoodieActiveTimeline.createNewInstantTime
+    client.startCommitWithTime(instantTime, commitActionType)
+
+    val hoodieTable = HoodieSparkTable.create(client.getConfig, client.getEngineContext)
+    val timeLine = hoodieTable.getActiveTimeline
+    val requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime)
+    val metadata = new HoodieCommitMetadata
+    metadata.setOperationType(WriteOperationType.ALTER_SCHEMA)
+    timeLine.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString.getBytes(StandardCharsets.UTF_8)))
+    val extraMeta = new util.HashMap[String, String]()
+    extraMeta.put(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(internalSchema.setSchemaId(instantTime.toLong)))
+    val schemaManager = new FileBasedInternalSchemaStorageManager(metaClient)
+    schemaManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(internalSchema, historySchemaStr))
+    client.commit(instantTime, jsc.emptyRDD, Option.of(extraMeta))
+    val existRoTable = sparkSession.catalog.tableExists(table.identifier.unquotedString + "_ro")
+    val existRtTable = sparkSession.catalog.tableExists(table.identifier.unquotedString + "_rt")
+    try {
+      sparkSession.catalog.refreshTable(table.identifier.unquotedString)
+      // try to refresh ro/rt table
+      if (existRoTable) sparkSession.catalog.refreshTable(table.identifier.unquotedString + "_ro")
+      if (existRoTable) sparkSession.catalog.refreshTable(table.identifier.unquotedString + "_rt")
+    } catch {
+      case NonFatal(e) =>
+        log.error(s"Exception when attempting to refresh table ${table.identifier.quotedString}", e)
+    }
+    // try to sync to hive
+    // drop partition field before call alter table
+    val fullSparkSchema = SparkInternalSchemaConverter.constructSparkSchemaFromInternalSchema(internalSchema)
+    val dataSparkSchema = new StructType(fullSparkSchema.fields.filter(p => !table.partitionColumnNames.exists(f => sparkSession.sessionState.conf.resolver(f, p.name))))
+    alterTableDataSchema(sparkSession, table.identifier.database.getOrElse("default"), table.identifier.table, dataSparkSchema)
+    if (existRoTable) alterTableDataSchema(sparkSession, table.identifier.database.getOrElse("default"), table.identifier.table + "_ro", dataSparkSchema)
+    if (existRtTable) alterTableDataSchema(sparkSession, table.identifier.database.getOrElse("default"), table.identifier.table + "_rt", dataSparkSchema)
+  }
+
+  def alterTableDataSchema(sparkSession: SparkSession, db: String, tableName: String, dataSparkSchema: StructType): Unit = {
+    sparkSession.sessionState.catalog
+      .externalCatalog
+      .alterTableDataSchema(db, tableName, dataSparkSchema)
+  }
+
+  def getTableLocation(table: CatalogTable, sparkSession: SparkSession): String = {
+    val uri = if (table.tableType == CatalogTableType.MANAGED) {
+      Some(sparkSession.sessionState.catalog.defaultTablePath(table.identifier))
+    } else {
+      table.storage.locationUri
+    }
+    val conf = sparkSession.sessionState.newHadoopConf()
+    uri.map(makePathQualified(_, conf))
+      .map(removePlaceHolder)
+      .getOrElse(throw new IllegalArgumentException(s"Missing location for ${table.identifier}"))
+  }
+
+  private def removePlaceHolder(path: String): String = {
+    if (path == null || path.length == 0) {
+      path
+    } else if (path.endsWith("-__PLACEHOLDER__")) {
+      path.substring(0, path.length() - 16)
+    } else {
+      path
+    }
+  }
+
+  def makePathQualified(path: URI, hadoopConf: Configuration): String = {
+    val hadoopPath = new Path(path)
+    val fs = hadoopPath.getFileSystem(hadoopConf)
+    fs.makeQualified(hadoopPath).toUri.toString
+  }
+
+  def getParentName(names: Array[String]): String = {
+    if (names.size > 1) {
+      names.dropRight(1).mkString(".")
+    } else ""
+  }
+
+  def parametersWithWriteDefaults(parameters: Map[String, String]): Map[String, String] = {
+    Map(OPERATION.key -> OPERATION.defaultValue,
+      TABLE_TYPE.key -> TABLE_TYPE.defaultValue,
+      PRECOMBINE_FIELD.key -> PRECOMBINE_FIELD.defaultValue,
+      HoodieWriteConfig.WRITE_PAYLOAD_CLASS_NAME.key ->  HoodieWriteConfig.DEFAULT_WRITE_PAYLOAD_CLASS,
+      INSERT_DROP_DUPS.key -> INSERT_DROP_DUPS.defaultValue,
+      ASYNC_COMPACT_ENABLE.key -> ASYNC_COMPACT_ENABLE.defaultValue,
+      INLINE_CLUSTERING_ENABLE.key -> INLINE_CLUSTERING_ENABLE.defaultValue,
+      ASYNC_CLUSTERING_ENABLE.key -> ASYNC_CLUSTERING_ENABLE.defaultValue
+    ) ++ DataSourceOptionsHelper.translateConfigurations(parameters)
+  }
+
+  def checkSchemaChange(colNames: Seq[String], catalogTable: CatalogTable): Unit = {
+    val primaryKeys = catalogTable.storage.properties.getOrElse("primaryKey", catalogTable.properties.getOrElse("primaryKey", "keyid")).split(",").map(_.trim)
+    val preCombineKey = Seq(catalogTable.storage.properties.getOrElse("preCombineField", catalogTable.properties.getOrElse("preCombineField", "ts"))).map(_.trim)
+    val partitionKey = catalogTable.partitionColumnNames.map(_.trim)
+    val checkNames = primaryKeys ++ preCombineKey ++ partitionKey
+    colNames.foreach { col =>
+      if (checkNames.contains(col)) {
+        throw new UnsupportedOperationException("cannot support apply changes for primaryKey/CombineKey/partitionKey")
+      }
+    }
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312ExtendedSqlParser.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312ExtendedSqlParser.scala
new file mode 100644
index 0000000000000..64fbda9a5f187
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312ExtendedSqlParser.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.execution.{SparkSqlAstBuilder, SparkSqlParser}
+
+// TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x
+class HoodieSpark312ExtendedSqlParser(session: SparkSession, delegate: ParserInterface) extends SparkSqlParser with Logging {
+  override val astBuilder: SparkSqlAstBuilder = new HoodieSpark312SqlAstBuilder(session)
+}
+
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312SqlAstBuilder.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312SqlAstBuilder.scala
new file mode 100644
index 0000000000000..d92cceb9415ab
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312SqlAstBuilder.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
+import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.SparkSqlAstBuilder
+
+// TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x
+class HoodieSpark312SqlAstBuilder(sparkSession: SparkSession) extends SparkSqlAstBuilder {
+
+  /**
+    * Parse a [[AlterTableAlterColumnStatement]] command to alter a column's property.
+    *
+    * For example:
+    * {{{
+    *   ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint
+    *   ALTER TABLE table1 ALTER COLUMN a.b.c SET NOT NULL
+    *   ALTER TABLE table1 ALTER COLUMN a.b.c DROP NOT NULL
+    *   ALTER TABLE table1 ALTER COLUMN a.b.c COMMENT 'new comment'
+    *   ALTER TABLE table1 ALTER COLUMN a.b.c FIRST
+    *   ALTER TABLE table1 ALTER COLUMN a.b.c AFTER x
+    * }}}
+    */
+  override def visitAlterTableAlterColumn(ctx: AlterTableAlterColumnContext): LogicalPlan = withOrigin(ctx) {
+    val alter = super.visitAlterTableAlterColumn(ctx).asInstanceOf[AlterTableAlterColumnStatement]
+    HoodieAlterTableAlterColumnStatement(alter.tableName, alter.column, alter.dataType, alter.nullable, alter.comment, alter.position)
+  }
+
+  /**
+    * Parse a [[org.apache.spark.sql.catalyst.plans.logical.AlterTableAddColumnsStatement]] command.
+    *
+    * For example:
+    * {{{
+    *   ALTER TABLE table1
+    *   ADD COLUMNS (col_name data_type [COMMENT col_comment], ...);
+    * }}}
+    */
+  override def visitAddTableColumns(ctx: AddTableColumnsContext): LogicalPlan = withOrigin(ctx) {
+    val add = super.visitAddTableColumns(ctx).asInstanceOf[AlterTableAddColumnsStatement]
+    HoodieAlterTableAddColumnsStatement(add.tableName, add.columnsToAdd)
+  }
+
+  /**
+    * Parse a [[org.apache.spark.sql.catalyst.plans.logical.AlterTableRenameColumnStatement]] command.
+    *
+    * For example:
+    * {{{
+    *   ALTER TABLE table1 RENAME COLUMN a.b.c TO x
+    * }}}
+    */
+  override def visitRenameTableColumn(
+                                       ctx: RenameTableColumnContext): LogicalPlan = withOrigin(ctx) {
+    val rename = super.visitRenameTableColumn(ctx).asInstanceOf[AlterTableRenameColumnStatement]
+    HoodieAlterTableRenameColumnStatement(rename.tableName, rename.column, rename.newName)
+  }
+
+  /**
+    * Parse a [[AlterTableDropColumnsStatement]] command.
+    *
+    * For example:
+    * {{{
+    *   ALTER TABLE table1 DROP COLUMN a.b.c
+    *   ALTER TABLE table1 DROP COLUMNS a.b.c, x, y
+    * }}}
+    */
+  override def visitDropTableColumns(
+                                      ctx: DropTableColumnsContext): LogicalPlan = withOrigin(ctx) {
+    val drop = super.visitDropTableColumns(ctx).asInstanceOf[AlterTableDropColumnsStatement]
+    HoodieAlterTableDropColumnsStatement(drop.tableName, drop.columnsToDrop)
+  }
+
+  /**
+    * Parse [[AlterViewSetPropertiesStatement]] or [[AlterTableSetPropertiesStatement]] commands.
+    *
+    * For example:
+    * {{{
+    *   ALTER TABLE table SET TBLPROPERTIES ('table_property' = 'property_value');
+    *   ALTER VIEW view SET TBLPROPERTIES ('table_property' = 'property_value');
+    * }}}
+    */
+  override def visitSetTableProperties(
+                                        ctx: SetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
+    val set = super.visitSetTableProperties(ctx)
+    set match {
+      case s: AlterTableSetPropertiesStatement => HoodieAlterTableSetPropertiesStatement(s.tableName, s.properties)
+      case other => other
+    }
+  }
+
+  /**
+    * Parse [[AlterViewUnsetPropertiesStatement]] or [[AlterTableUnsetPropertiesStatement]] commands.
+    *
+    * For example:
+    * {{{
+    *   ALTER TABLE table UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key');
+    *   ALTER VIEW view UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key');
+    * }}}
+    */
+  override def visitUnsetTableProperties(
+                                          ctx: UnsetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
+    val unset = super.visitUnsetTableProperties(ctx)
+    unset match {
+      case us: AlterTableUnsetPropertiesStatement => HoodieAlterTableUnsetPropertiesStatement(us.tableName, us.propertyKeys, us.ifExists)
+      case other => other
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/pom.xml b/hudi-spark-datasource/hudi-spark3/pom.xml
index d7c8799d4d282..a09a604db579e 100644
--- a/hudi-spark-datasource/hudi-spark3/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <artifactId>hudi-spark-datasource</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-spark3_2.12</artifactId>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
 
   <name>hudi-spark3_2.12</name>
   <packaging>jar</packaging>
@@ -80,6 +80,12 @@
       <plugin>
         <groupId>net.alchim31.maven</groupId>
         <artifactId>scala-maven-plugin</artifactId>
+        <configuration>
+          <args>
+            <arg>-nobootcp</arg>
+            <arg>-target:jvm-1.8</arg>
+          </args>
+        </configuration>
         <executions>
           <execution>
             <id>scala-compile-first</id>
@@ -175,11 +181,33 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_2.12</artifactId>
-      <version>${spark3.version}</version>
+      <version>${spark32.version}</version>
+      <scope>provided</scope>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_2.12</artifactId>
+      <version>${spark32.version}</version>
       <scope>provided</scope>
       <optional>true</optional>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_2.12</artifactId>
+      <version>${spark32.version}</version>
+      <scope>provided</scope>
+      <optional>true</optional>
+      <exclusions>
+        <exclusion>
+          <groupId>*</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
@@ -200,18 +228,49 @@
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-client</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+
+    <dependency>
+      <groupId>org.json4s</groupId>
+      <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
+      <version>3.7.0-M11</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark3-common</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
-
     <!-- Hoodie - Test -->
     <dependency>
       <groupId>org.apache.hudi</groupId>
@@ -221,6 +280,7 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-client</artifactId>
@@ -228,7 +288,14 @@
       <classifier>tests</classifier>
       <type>test-jar</type>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-common</artifactId>
@@ -237,6 +304,7 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
@@ -244,6 +312,12 @@
       <classifier>tests</classifier>
       <type>test-jar</type>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieVectorizedParquetRecordReader.java b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieVectorizedParquetRecordReader.java
new file mode 100644
index 0000000000000..abff023104c93
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieVectorizedParquetRecordReader.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet;
+
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.spark.memory.MemoryMode;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+import java.io.IOException;
+import java.time.ZoneId;
+import java.util.HashMap;
+import java.util.Map;
+
+public class Spark32HoodieVectorizedParquetRecordReader extends VectorizedParquetRecordReader {
+
+  // save the col type change info.
+  private Map<Integer, Pair<DataType, DataType>> typeChangeInfos;
+
+  private ColumnarBatch columnarBatch;
+
+  private Map<Integer, WritableColumnVector> idToColumnVectors;
+
+  private WritableColumnVector[] columnVectors;
+
+  // The capacity of vectorized batch.
+  private int capacity;
+
+  // If true, this class returns batches instead of rows.
+  private boolean returnColumnarBatch;
+
+  // The memory mode of the columnarBatch.
+  private final MemoryMode memoryMode;
+
+  /**
+   * Batch of rows that we assemble and the current index we've returned. Every time this
+   * batch is used up (batchIdx == numBatched), we populated the batch.
+   */
+  private int batchIdx = 0;
+  private int numBatched = 0;
+
+  public Spark32HoodieVectorizedParquetRecordReader(
+      ZoneId convertTz,
+      String datetimeRebaseMode,
+      String datetimeRebaseTz,
+      String int96RebaseMode,
+      String int96RebaseTz,
+      boolean useOffHeap,
+      int capacity,
+      Map<Integer, Pair<DataType, DataType>> typeChangeInfos) {
+    super(convertTz, datetimeRebaseMode, datetimeRebaseTz, int96RebaseMode, int96RebaseTz, useOffHeap, capacity);
+    memoryMode = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
+    this.typeChangeInfos = typeChangeInfos;
+    this.capacity = capacity;
+  }
+
+  @Override
+  public void initBatch(StructType partitionColumns, InternalRow partitionValues) {
+    super.initBatch(partitionColumns, partitionValues);
+    if (columnVectors == null) {
+      columnVectors = new WritableColumnVector[sparkSchema.length() + partitionColumns.length()];
+    }
+    if (idToColumnVectors == null) {
+      idToColumnVectors = new HashMap<>();
+      typeChangeInfos.entrySet()
+          .stream()
+          .forEach(f -> {
+            WritableColumnVector vector =
+                memoryMode == MemoryMode.OFF_HEAP ? new OffHeapColumnVector(capacity, f.getValue().getLeft()) : new OnHeapColumnVector(capacity, f.getValue().getLeft());
+            idToColumnVectors.put(f.getKey(), vector);
+          });
+    }
+  }
+
+  @Override
+  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException {
+    super.initialize(inputSplit, taskAttemptContext);
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+    for (Map.Entry<Integer, WritableColumnVector> e : idToColumnVectors.entrySet()) {
+      e.getValue().close();
+    }
+    idToColumnVectors = null;
+    columnarBatch = null;
+    columnVectors = null;
+  }
+
+  @Override
+  public ColumnarBatch resultBatch() {
+    ColumnarBatch currentColumnBatch = super.resultBatch();
+    boolean changed = false;
+    for (Map.Entry<Integer, Pair<DataType, DataType>> entry : typeChangeInfos.entrySet()) {
+      boolean rewrite = SparkInternalSchemaConverter
+          .convertColumnVectorType((WritableColumnVector) currentColumnBatch.column(entry.getKey()),
+              idToColumnVectors.get(entry.getKey()), currentColumnBatch.numRows());
+      if (rewrite) {
+        changed = true;
+        columnVectors[entry.getKey()] = idToColumnVectors.get(entry.getKey());
+      }
+    }
+    if (changed) {
+      if (columnarBatch == null) {
+        // fill other vector
+        for (int i = 0; i < columnVectors.length; i++) {
+          if (columnVectors[i] == null) {
+            columnVectors[i] = (WritableColumnVector) currentColumnBatch.column(i);
+          }
+        }
+        columnarBatch = new ColumnarBatch(columnVectors);
+      }
+      columnarBatch.setNumRows(currentColumnBatch.numRows());
+      return columnarBatch;
+    } else {
+      return currentColumnBatch;
+    }
+  }
+
+  @Override
+  public boolean nextBatch() throws IOException {
+    boolean result = super.nextBatch();
+    if (idToColumnVectors != null) {
+      idToColumnVectors.entrySet().stream().forEach(e -> e.getValue().reset());
+    }
+    numBatched = resultBatch().numRows();
+    batchIdx = 0;
+    return result;
+  }
+
+  @Override
+  public void enableReturningBatches() {
+    returnColumnarBatch = true;
+    super.enableReturningBatches();
+  }
+
+  @Override
+  public Object getCurrentValue() {
+    if (typeChangeInfos == null || typeChangeInfos.isEmpty()) {
+      return super.getCurrentValue();
+    }
+
+    if (returnColumnarBatch) {
+      return columnarBatch == null ? super.getCurrentValue() : columnarBatch;
+    }
+
+    return columnarBatch == null ? super.getCurrentValue() : columnarBatch.getRow(batchIdx - 1);
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException {
+    resultBatch();
+
+    if (returnColumnarBatch)  {
+      return nextBatch();
+    }
+
+    if (batchIdx >= numBatched) {
+      if (!nextBatch()) {
+        return false;
+      }
+    }
+    ++batchIdx;
+    return true;
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/HoodieSpark3_2CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/HoodieSpark3_2CatalystExpressionUtils.scala
new file mode 100644
index 0000000000000..fc8c957e75bd9
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/HoodieSpark3_2CatalystExpressionUtils.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import HoodieSparkTypeUtils.isCastPreservingOrdering
+import org.apache.spark.sql.catalyst.expressions.{Add, AttributeReference, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper}
+
+object HoodieSpark3_2CatalystExpressionUtils extends HoodieCatalystExpressionUtils {
+
+  override def tryMatchAttributeOrderingPreservingTransformation(expr: Expression): Option[AttributeReference] = {
+    expr match {
+      case OrderPreservingTransformation(attrRef) => Some(attrRef)
+      case _ => None
+    }
+  }
+
+  private object OrderPreservingTransformation {
+    def unapply(expr: Expression): Option[AttributeReference] = {
+      expr match {
+        // Date/Time Expressions
+        case DateFormatClass(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case DateAdd(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateSub(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateDiff(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case DateDiff(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case FromUnixTime(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case FromUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case ParseToDate(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case ParseToTimestamp(OrderPreservingTransformation(attrRef), _, _, _) => Some(attrRef)
+        case ToUnixTimestamp(OrderPreservingTransformation(attrRef), _, _, _) => Some(attrRef)
+        case ToUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+
+        // String Expressions
+        case Lower(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Upper(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case org.apache.spark.sql.catalyst.expressions.Left(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+
+        // Math Expressions
+        // Binary
+        case Add(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case Add(_, OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case Multiply(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case Multiply(_, OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case Divide(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef)
+        case BitwiseOr(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case BitwiseOr(_, OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        // Unary
+        case Exp(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Expm1(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log10(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log1p(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case Log2(OrderPreservingTransformation(attrRef)) => Some(attrRef)
+        case ShiftLeft(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+        case ShiftRight(OrderPreservingTransformation(attrRef), _) => Some(attrRef)
+
+        // Other
+        case cast @ Cast(OrderPreservingTransformation(attrRef), _, _, _)
+          if isCastPreservingOrdering(cast.child.dataType, cast.dataType) => Some(attrRef)
+
+        // Identity transformation
+        case attrRef: AttributeReference => Some(attrRef)
+        // No match
+        case _ => None
+      }
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/adapter/Spark3_2Adapter.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/adapter/Spark3_2Adapter.scala
index 699623f8b58b0..bad392b4f97ac 100644
--- a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/adapter/Spark3_2Adapter.scala
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/adapter/Spark3_2Adapter.scala
@@ -17,16 +17,31 @@
 
 package org.apache.spark.sql.adapter
 
-import org.apache.spark.sql.SparkSession
+import org.apache.avro.Schema
+import org.apache.spark.sql.avro._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.SPARK_VERSION
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.parser.HoodieSpark3_2ExtendedSqlParser
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{HoodieCatalystExpressionUtils, HoodieSpark3_2CatalystExpressionUtils, SparkSession}
 
 /**
- * The adapter for spark3.2.
+ * Implementation of [[SparkAdapter]] for Spark 3.2.x branch
  */
-class Spark3_2Adapter extends Spark3Adapter {
+class Spark3_2Adapter extends BaseSpark3Adapter {
+
+  override def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializer =
+    new HoodieSpark3_2AvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  override def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer =
+    new HoodieSpark3_2AvroDeserializer(rootAvroType, rootCatalystType)
+
+  override def createCatalystExpressionUtils(): HoodieCatalystExpressionUtils = HoodieSpark3_2CatalystExpressionUtils
+
   /**
    * if the logical plan is a TimeTravelRelation LogicalPlan.
    */
@@ -51,4 +66,28 @@ class Spark3_2Adapter extends Spark3Adapter {
       (spark: SparkSession, delegate: ParserInterface) => new HoodieSpark3_2ExtendedSqlParser(spark, delegate)
     )
   }
+
+  override def createResolveHudiAlterTableCommand(sparkSession: SparkSession): Rule[LogicalPlan] = {
+    if (SPARK_VERSION.startsWith("3.2")) {
+      val loadClassName = "org.apache.spark.sql.hudi.ResolveHudiAlterTableCommandSpark32"
+      val clazz = Class.forName(loadClassName, true, Thread.currentThread().getContextClassLoader)
+      val ctor = clazz.getConstructors.head
+      ctor.newInstance(sparkSession).asInstanceOf[Rule[LogicalPlan]]
+    } else {
+      new Rule[LogicalPlan] {
+        override def apply(plan: LogicalPlan): LogicalPlan = plan
+      }
+    }
+  }
+
+  override def createHoodieParquetFileFormat(): Option[ParquetFileFormat] = {
+    if (SPARK_VERSION.startsWith("3.2")) {
+      val loadClassName = "org.apache.spark.sql.execution.datasources.parquet.Spark32HoodieParquetFileFormat"
+      val clazz = Class.forName(loadClassName, true, Thread.currentThread().getContextClassLoader)
+      val ctor = clazz.getConstructors.head
+      Some(ctor.newInstance().asInstanceOf[ParquetFileFormat])
+    } else {
+      None
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
new file mode 100644
index 0000000000000..ef9b5909207ca
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -0,0 +1,510 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import java.math.BigDecimal
+import java.nio.ByteBuffer
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic._
+import org.apache.avro.util.Utf8
+import org.apache.spark.sql.avro.AvroDeserializer.{RebaseSpec, createDateRebaseFuncInRead, createTimestampRebaseFuncInRead}
+import org.apache.spark.sql.avro.AvroUtils.{toFieldDescription, toFieldStr}
+import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, StructFilters}
+import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData, RebaseDateTime}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+import java.util.TimeZone
+
+/**
+ * A deserializer to deserialize data in avro format to data in catalyst format.
+ *
+ * NOTE: This code is borrowed from Spark 3.2.1
+ * This code is borrowed, so that we can better control compatibility w/in Spark minor
+ * branches (3.2.x, 3.1.x, etc)
+ *
+ * PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+private[sql] class AvroDeserializer(rootAvroType: Schema,
+                                    rootCatalystType: DataType,
+                                    positionalFieldMatch: Boolean,
+                                    datetimeRebaseSpec: RebaseSpec,
+                                    filters: StructFilters) {
+
+  def this(rootAvroType: Schema,
+           rootCatalystType: DataType,
+           datetimeRebaseMode: String) = {
+    this(
+      rootAvroType,
+      rootCatalystType,
+      positionalFieldMatch = false,
+      RebaseSpec(LegacyBehaviorPolicy.withName(datetimeRebaseMode)),
+      new NoopFilters)
+  }
+
+  private lazy val decimalConversions = new DecimalConversion()
+
+  private val dateRebaseFunc = createDateRebaseFuncInRead(datetimeRebaseSpec.mode, "Avro")
+
+  private val timestampRebaseFunc = createTimestampRebaseFuncInRead(datetimeRebaseSpec, "Avro")
+
+  private val converter: Any => Option[Any] = try {
+    rootCatalystType match {
+      // A shortcut for empty schema.
+      case st: StructType if st.isEmpty =>
+        (_: Any) => Some(InternalRow.empty)
+
+      case st: StructType =>
+        val resultRow = new SpecificInternalRow(st.map(_.dataType))
+        val fieldUpdater = new RowUpdater(resultRow)
+        val applyFilters = filters.skipRow(resultRow, _)
+        val writer = getRecordWriter(rootAvroType, st, Nil, Nil, applyFilters)
+        (data: Any) => {
+          val record = data.asInstanceOf[GenericRecord]
+          val skipRow = writer(fieldUpdater, record)
+          if (skipRow) None else Some(resultRow)
+        }
+
+      case _ =>
+        val tmpRow = new SpecificInternalRow(Seq(rootCatalystType))
+        val fieldUpdater = new RowUpdater(tmpRow)
+        val writer = newWriter(rootAvroType, rootCatalystType, Nil, Nil)
+        (data: Any) => {
+          writer(fieldUpdater, 0, data)
+          Some(tmpRow.get(0, rootCatalystType))
+        }
+    }
+  } catch {
+    case ise: IncompatibleSchemaException => throw new IncompatibleSchemaException(
+      s"Cannot convert Avro type $rootAvroType to SQL type ${rootCatalystType.sql}.", ise)
+  }
+
+  def deserialize(data: Any): Option[Any] = converter(data)
+
+  /**
+   * Creates a writer to write avro values to Catalyst values at the given ordinal with the given
+   * updater.
+   */
+  private def newWriter(avroType: Schema,
+                        catalystType: DataType,
+                        avroPath: Seq[String],
+                        catalystPath: Seq[String]): (CatalystDataUpdater, Int, Any) => Unit = {
+    val errorPrefix = s"Cannot convert Avro ${toFieldStr(avroPath)} to " +
+      s"SQL ${toFieldStr(catalystPath)} because "
+    val incompatibleMsg = errorPrefix +
+      s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})"
+
+    (avroType.getType, catalystType) match {
+      case (NULL, NullType) => (updater, ordinal, _) =>
+        updater.setNullAt(ordinal)
+
+      // TODO: we can avoid boxing if future version of avro provide primitive accessors.
+      case (BOOLEAN, BooleanType) => (updater, ordinal, value) =>
+        updater.setBoolean(ordinal, value.asInstanceOf[Boolean])
+
+      case (INT, IntegerType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, value.asInstanceOf[Int])
+
+      case (INT, DateType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, dateRebaseFunc(value.asInstanceOf[Int]))
+
+      case (LONG, LongType) => (updater, ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[Long])
+
+      case (LONG, TimestampType) => avroType.getLogicalType match {
+        // For backward compatibility, if the Avro type is Long and it is not logical type
+        // (the `null` case), the value is processed as timestamp type with millisecond precision.
+        case null | _: TimestampMillis => (updater, ordinal, value) =>
+          val millis = value.asInstanceOf[Long]
+          val micros = DateTimeUtils.millisToMicros(millis)
+          updater.setLong(ordinal, timestampRebaseFunc(micros))
+        case _: TimestampMicros => (updater, ordinal, value) =>
+          val micros = value.asInstanceOf[Long]
+          updater.setLong(ordinal, timestampRebaseFunc(micros))
+        case other => throw new IncompatibleSchemaException(errorPrefix +
+          s"Avro logical type $other cannot be converted to SQL type ${TimestampType.sql}.")
+      }
+
+      // Before we upgrade Avro to 1.8 for logical type support, spark-avro converts Long to Date.
+      // For backward compatibility, we still keep this conversion.
+      case (LONG, DateType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, (value.asInstanceOf[Long] / MILLIS_PER_DAY).toInt)
+
+      case (FLOAT, FloatType) => (updater, ordinal, value) =>
+        updater.setFloat(ordinal, value.asInstanceOf[Float])
+
+      case (DOUBLE, DoubleType) => (updater, ordinal, value) =>
+        updater.setDouble(ordinal, value.asInstanceOf[Double])
+
+      case (STRING, StringType) => (updater, ordinal, value) =>
+        val str = value match {
+          case s: String => UTF8String.fromString(s)
+          case s: Utf8 =>
+            val bytes = new Array[Byte](s.getByteLength)
+            System.arraycopy(s.getBytes, 0, bytes, 0, s.getByteLength)
+            UTF8String.fromBytes(bytes)
+        }
+        updater.set(ordinal, str)
+
+      case (ENUM, StringType) => (updater, ordinal, value) =>
+        updater.set(ordinal, UTF8String.fromString(value.toString))
+
+      case (FIXED, BinaryType) => (updater, ordinal, value) =>
+        updater.set(ordinal, value.asInstanceOf[GenericFixed].bytes().clone())
+
+      case (BYTES, BinaryType) => (updater, ordinal, value) =>
+        val bytes = value match {
+          case b: ByteBuffer =>
+            val bytes = new Array[Byte](b.remaining)
+            b.get(bytes)
+            bytes
+          case b: Array[Byte] => b
+          case other =>
+            throw new RuntimeException(errorPrefix + s"$other is not a valid avro binary.")
+        }
+        updater.set(ordinal, bytes)
+
+      case (FIXED, _: DecimalType) => (updater, ordinal, value) =>
+        val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal]
+        val bigDecimal = decimalConversions.fromFixed(value.asInstanceOf[GenericFixed], avroType, d)
+        val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
+        updater.setDecimal(ordinal, decimal)
+
+      case (BYTES, _: DecimalType) => (updater, ordinal, value) =>
+        val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal]
+        val bigDecimal = decimalConversions.fromBytes(value.asInstanceOf[ByteBuffer], avroType, d)
+        val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
+        updater.setDecimal(ordinal, decimal)
+
+      case (RECORD, st: StructType) =>
+        // Avro datasource doesn't accept filters with nested attributes. See SPARK-32328.
+        // We can always return `false` from `applyFilters` for nested records.
+        val writeRecord =
+          getRecordWriter(avroType, st, avroPath, catalystPath, applyFilters = _ => false)
+        (updater, ordinal, value) =>
+          val row = new SpecificInternalRow(st)
+          writeRecord(new RowUpdater(row), value.asInstanceOf[GenericRecord])
+          updater.set(ordinal, row)
+
+      case (ARRAY, ArrayType(elementType, containsNull)) =>
+        val avroElementPath = avroPath :+ "element"
+        val elementWriter = newWriter(avroType.getElementType, elementType,
+          avroElementPath, catalystPath :+ "element")
+        (updater, ordinal, value) =>
+          val collection = value.asInstanceOf[java.util.Collection[Any]]
+          val result = createArrayData(elementType, collection.size())
+          val elementUpdater = new ArrayDataUpdater(result)
+
+          var i = 0
+          val iter = collection.iterator()
+          while (iter.hasNext) {
+            val element = iter.next()
+            if (element == null) {
+              if (!containsNull) {
+                throw new RuntimeException(
+                  s"Array value at path ${toFieldStr(avroElementPath)} is not allowed to be null")
+              } else {
+                elementUpdater.setNullAt(i)
+              }
+            } else {
+              elementWriter(elementUpdater, i, element)
+            }
+            i += 1
+          }
+
+          updater.set(ordinal, result)
+
+      case (MAP, MapType(keyType, valueType, valueContainsNull)) if keyType == StringType =>
+        val keyWriter = newWriter(SchemaBuilder.builder().stringType(), StringType,
+          avroPath :+ "key", catalystPath :+ "key")
+        val valueWriter = newWriter(avroType.getValueType, valueType,
+          avroPath :+ "value", catalystPath :+ "value")
+        (updater, ordinal, value) =>
+          val map = value.asInstanceOf[java.util.Map[AnyRef, AnyRef]]
+          val keyArray = createArrayData(keyType, map.size())
+          val keyUpdater = new ArrayDataUpdater(keyArray)
+          val valueArray = createArrayData(valueType, map.size())
+          val valueUpdater = new ArrayDataUpdater(valueArray)
+          val iter = map.entrySet().iterator()
+          var i = 0
+          while (iter.hasNext) {
+            val entry = iter.next()
+            assert(entry.getKey != null)
+            keyWriter(keyUpdater, i, entry.getKey)
+            if (entry.getValue == null) {
+              if (!valueContainsNull) {
+                throw new RuntimeException(
+                  s"Map value at path ${toFieldStr(avroPath :+ "value")} is not allowed to be null")
+              } else {
+                valueUpdater.setNullAt(i)
+              }
+            } else {
+              valueWriter(valueUpdater, i, entry.getValue)
+            }
+            i += 1
+          }
+
+          // The Avro map will never have null or duplicated map keys, it's safe to create a
+          // ArrayBasedMapData directly here.
+          updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
+
+      case (UNION, _) =>
+        val allTypes = avroType.getTypes.asScala
+        val nonNullTypes = allTypes.filter(_.getType != NULL)
+        val nonNullAvroType = Schema.createUnion(nonNullTypes.asJava)
+        if (nonNullTypes.nonEmpty) {
+          if (nonNullTypes.length == 1) {
+            newWriter(nonNullTypes.head, catalystType, avroPath, catalystPath)
+          } else {
+            nonNullTypes.map(_.getType).toSeq match {
+              case Seq(a, b) if Set(a, b) == Set(INT, LONG) && catalystType == LongType =>
+                (updater, ordinal, value) =>
+                  value match {
+                    case null => updater.setNullAt(ordinal)
+                    case l: java.lang.Long => updater.setLong(ordinal, l)
+                    case i: java.lang.Integer => updater.setLong(ordinal, i.longValue())
+                  }
+
+              case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && catalystType == DoubleType =>
+                (updater, ordinal, value) =>
+                  value match {
+                    case null => updater.setNullAt(ordinal)
+                    case d: java.lang.Double => updater.setDouble(ordinal, d)
+                    case f: java.lang.Float => updater.setDouble(ordinal, f.doubleValue())
+                  }
+
+              case _ =>
+                catalystType match {
+                  case st: StructType if st.length == nonNullTypes.size =>
+                    val fieldWriters = nonNullTypes.zip(st.fields).map {
+                      case (schema, field) =>
+                        newWriter(schema, field.dataType, avroPath, catalystPath :+ field.name)
+                    }.toArray
+                    (updater, ordinal, value) => {
+                      val row = new SpecificInternalRow(st)
+                      val fieldUpdater = new RowUpdater(row)
+                      val i = GenericData.get().resolveUnion(nonNullAvroType, value)
+                      fieldWriters(i)(fieldUpdater, i, value)
+                      updater.set(ordinal, row)
+                    }
+
+                  case _ => throw new IncompatibleSchemaException(incompatibleMsg)
+                }
+            }
+          }
+        } else {
+          (updater, ordinal, _) => updater.setNullAt(ordinal)
+        }
+
+      case _ => throw new IncompatibleSchemaException(incompatibleMsg)
+    }
+  }
+
+  // TODO: move the following method in Decimal object on creating Decimal from BigDecimal?
+  private def createDecimal(decimal: BigDecimal, precision: Int, scale: Int): Decimal = {
+    if (precision <= Decimal.MAX_LONG_DIGITS) {
+      // Constructs a `Decimal` with an unscaled `Long` value if possible.
+      Decimal(decimal.unscaledValue().longValue(), precision, scale)
+    } else {
+      // Otherwise, resorts to an unscaled `BigInteger` instead.
+      Decimal(decimal, precision, scale)
+    }
+  }
+
+  private def getRecordWriter(avroType: Schema,
+                              catalystType: StructType,
+                              avroPath: Seq[String],
+                              catalystPath: Seq[String],
+                              applyFilters: Int => Boolean): (CatalystDataUpdater, GenericRecord) => Boolean = {
+    val validFieldIndexes = ArrayBuffer.empty[Int]
+    val fieldWriters = ArrayBuffer.empty[(CatalystDataUpdater, Any) => Unit]
+
+    val avroSchemaHelper =
+      new AvroUtils.AvroSchemaHelper(avroType, avroPath, positionalFieldMatch)
+    val length = catalystType.length
+    var i = 0
+    while (i < length) {
+      val catalystField = catalystType.fields(i)
+      avroSchemaHelper.getAvroField(catalystField.name, i) match {
+        case Some(avroField) =>
+          validFieldIndexes += avroField.pos()
+
+          val baseWriter = newWriter(avroField.schema(), catalystField.dataType,
+            avroPath :+ avroField.name, catalystPath :+ catalystField.name)
+          val ordinal = i
+          val fieldWriter = (fieldUpdater: CatalystDataUpdater, value: Any) => {
+            if (value == null) {
+              fieldUpdater.setNullAt(ordinal)
+            } else {
+              baseWriter(fieldUpdater, ordinal, value)
+            }
+          }
+          fieldWriters += fieldWriter
+        case None if !catalystField.nullable =>
+          val fieldDescription =
+            toFieldDescription(catalystPath :+ catalystField.name, i, positionalFieldMatch)
+          throw new IncompatibleSchemaException(
+            s"Cannot find non-nullable $fieldDescription in Avro schema.")
+        case _ => // nothing to do
+      }
+      i += 1
+    }
+
+    (fieldUpdater, record) => {
+      var i = 0
+      var skipRow = false
+      while (i < validFieldIndexes.length && !skipRow) {
+        fieldWriters(i)(fieldUpdater, record.get(validFieldIndexes(i)))
+        skipRow = applyFilters(i)
+        i += 1
+      }
+      skipRow
+    }
+  }
+
+  private def createArrayData(elementType: DataType, length: Int): ArrayData = elementType match {
+    case BooleanType => UnsafeArrayData.fromPrimitiveArray(new Array[Boolean](length))
+    case ByteType => UnsafeArrayData.fromPrimitiveArray(new Array[Byte](length))
+    case ShortType => UnsafeArrayData.fromPrimitiveArray(new Array[Short](length))
+    case IntegerType => UnsafeArrayData.fromPrimitiveArray(new Array[Int](length))
+    case LongType => UnsafeArrayData.fromPrimitiveArray(new Array[Long](length))
+    case FloatType => UnsafeArrayData.fromPrimitiveArray(new Array[Float](length))
+    case DoubleType => UnsafeArrayData.fromPrimitiveArray(new Array[Double](length))
+    case _ => new GenericArrayData(new Array[Any](length))
+  }
+
+  /**
+   * A base interface for updating values inside catalyst data structure like `InternalRow` and
+   * `ArrayData`.
+   */
+  sealed trait CatalystDataUpdater {
+    def set(ordinal: Int, value: Any): Unit
+
+    def setNullAt(ordinal: Int): Unit = set(ordinal, null)
+
+    def setBoolean(ordinal: Int, value: Boolean): Unit = set(ordinal, value)
+
+    def setByte(ordinal: Int, value: Byte): Unit = set(ordinal, value)
+
+    def setShort(ordinal: Int, value: Short): Unit = set(ordinal, value)
+
+    def setInt(ordinal: Int, value: Int): Unit = set(ordinal, value)
+
+    def setLong(ordinal: Int, value: Long): Unit = set(ordinal, value)
+
+    def setDouble(ordinal: Int, value: Double): Unit = set(ordinal, value)
+
+    def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value)
+
+    def setDecimal(ordinal: Int, value: Decimal): Unit = set(ordinal, value)
+  }
+
+  final class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value)
+
+    override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal)
+
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = row.setBoolean(ordinal, value)
+
+    override def setByte(ordinal: Int, value: Byte): Unit = row.setByte(ordinal, value)
+
+    override def setShort(ordinal: Int, value: Short): Unit = row.setShort(ordinal, value)
+
+    override def setInt(ordinal: Int, value: Int): Unit = row.setInt(ordinal, value)
+
+    override def setLong(ordinal: Int, value: Long): Unit = row.setLong(ordinal, value)
+
+    override def setDouble(ordinal: Int, value: Double): Unit = row.setDouble(ordinal, value)
+
+    override def setFloat(ordinal: Int, value: Float): Unit = row.setFloat(ordinal, value)
+
+    override def setDecimal(ordinal: Int, value: Decimal): Unit =
+      row.setDecimal(ordinal, value, value.precision)
+  }
+
+  final class ArrayDataUpdater(array: ArrayData) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = array.update(ordinal, value)
+
+    override def setNullAt(ordinal: Int): Unit = array.setNullAt(ordinal)
+
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = array.setBoolean(ordinal, value)
+
+    override def setByte(ordinal: Int, value: Byte): Unit = array.setByte(ordinal, value)
+
+    override def setShort(ordinal: Int, value: Short): Unit = array.setShort(ordinal, value)
+
+    override def setInt(ordinal: Int, value: Int): Unit = array.setInt(ordinal, value)
+
+    override def setLong(ordinal: Int, value: Long): Unit = array.setLong(ordinal, value)
+
+    override def setDouble(ordinal: Int, value: Double): Unit = array.setDouble(ordinal, value)
+
+    override def setFloat(ordinal: Int, value: Float): Unit = array.setFloat(ordinal, value)
+
+    override def setDecimal(ordinal: Int, value: Decimal): Unit = array.update(ordinal, value)
+  }
+}
+
+object AvroDeserializer {
+
+  // NOTE: Following methods have been renamed in Spark 3.2.1 [1] making [[AvroDeserializer]] implementation
+  //       (which relies on it) be only compatible with the exact same version of [[DataSourceUtils]].
+  //       To make sure this implementation is compatible w/ all Spark versions w/in Spark 3.2.x branch,
+  //       we're preemptively cloned those methods to make sure Hudi is compatible w/ Spark 3.2.0 as well as
+  //       w/ Spark >= 3.2.1
+  //
+  // [1] https://github.com/apache/spark/pull/34978
+
+  // Specification of rebase operation including `mode` and the time zone in which it is performed
+  case class RebaseSpec(mode: LegacyBehaviorPolicy.Value, originTimeZone: Option[String] = None) {
+    // Use the default JVM time zone for backward compatibility
+    def timeZone: String = originTimeZone.getOrElse(TimeZone.getDefault.getID)
+  }
+
+  def createDateRebaseFuncInRead(rebaseMode: LegacyBehaviorPolicy.Value,
+                                 format: String): Int => Int = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => days: Int =>
+      if (days < RebaseDateTime.lastSwitchJulianDay) {
+        throw DataSourceUtils.newRebaseExceptionInRead(format)
+      }
+      days
+    case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseJulianToGregorianDays
+    case LegacyBehaviorPolicy.CORRECTED => identity[Int]
+  }
+
+  def createTimestampRebaseFuncInRead(rebaseSpec: RebaseSpec,
+                                      format: String): Long => Long = rebaseSpec.mode match {
+    case LegacyBehaviorPolicy.EXCEPTION => micros: Long =>
+      if (micros < RebaseDateTime.lastSwitchJulianTs) {
+        throw DataSourceUtils.newRebaseExceptionInRead(format)
+      }
+      micros
+    case LegacyBehaviorPolicy.LEGACY => micros: Long =>
+      RebaseDateTime.rebaseJulianToGregorianMicros(TimeZone.getTimeZone(rebaseSpec.timeZone), micros)
+    case LegacyBehaviorPolicy.CORRECTED => identity[Long]
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
new file mode 100644
index 0000000000000..73267f4147139
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -0,0 +1,426 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import java.nio.ByteBuffer
+import scala.collection.JavaConverters._
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema
+import org.apache.avro.Schema.Type
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed}
+import org.apache.avro.generic.GenericData.Record
+import org.apache.avro.util.Utf8
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.avro.AvroSerializer.{createDateRebaseFuncInWrite, createTimestampRebaseFuncInWrite}
+import org.apache.spark.sql.avro.AvroUtils.{toFieldDescription, toFieldStr}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, RebaseDateTime}
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.types._
+
+import java.util.TimeZone
+
+/**
+ * A serializer to serialize data in catalyst format to data in avro format.
+ *
+ * NOTE: This code is borrowed from Spark 3.2.1
+ * This code is borrowed, so that we can better control compatibility w/in Spark minor
+ * branches (3.2.x, 3.1.x, etc)
+ *
+ * PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+private[sql] class AvroSerializer(rootCatalystType: DataType,
+                                  rootAvroType: Schema,
+                                  nullable: Boolean,
+                                  positionalFieldMatch: Boolean,
+                                  datetimeRebaseMode: LegacyBehaviorPolicy.Value) extends Logging {
+
+  def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) = {
+    this(rootCatalystType, rootAvroType, nullable, positionalFieldMatch = false,
+      LegacyBehaviorPolicy.withName(SQLConf.get.getConf(SQLConf.AVRO_REBASE_MODE_IN_WRITE)))
+  }
+
+  def serialize(catalystData: Any): Any = {
+    converter.apply(catalystData)
+  }
+
+  private val dateRebaseFunc = createDateRebaseFuncInWrite(
+    datetimeRebaseMode, "Avro")
+
+  private val timestampRebaseFunc = createTimestampRebaseFuncInWrite(
+    datetimeRebaseMode, "Avro")
+
+  private val converter: Any => Any = {
+    val actualAvroType = resolveNullableType(rootAvroType, nullable)
+    val baseConverter = try {
+      rootCatalystType match {
+        case st: StructType =>
+          newStructConverter(st, actualAvroType, Nil, Nil).asInstanceOf[Any => Any]
+        case _ =>
+          val tmpRow = new SpecificInternalRow(Seq(rootCatalystType))
+          val converter = newConverter(rootCatalystType, actualAvroType, Nil, Nil)
+          (data: Any) =>
+            tmpRow.update(0, data)
+            converter.apply(tmpRow, 0)
+      }
+    } catch {
+      case ise: IncompatibleSchemaException => throw new IncompatibleSchemaException(
+        s"Cannot convert SQL type ${rootCatalystType.sql} to Avro type $rootAvroType.", ise)
+    }
+    if (nullable) {
+      (data: Any) =>
+        if (data == null) {
+          null
+        } else {
+          baseConverter.apply(data)
+        }
+    } else {
+      baseConverter
+    }
+  }
+
+  private type Converter = (SpecializedGetters, Int) => Any
+
+  private lazy val decimalConversions = new DecimalConversion()
+
+  private def newConverter(catalystType: DataType,
+                           avroType: Schema,
+                           catalystPath: Seq[String],
+                           avroPath: Seq[String]): Converter = {
+    val errorPrefix = s"Cannot convert SQL ${toFieldStr(catalystPath)} " +
+      s"to Avro ${toFieldStr(avroPath)} because "
+    (catalystType, avroType.getType) match {
+      case (NullType, NULL) =>
+        (getter, ordinal) => null
+      case (BooleanType, BOOLEAN) =>
+        (getter, ordinal) => getter.getBoolean(ordinal)
+      case (ByteType, INT) =>
+        (getter, ordinal) => getter.getByte(ordinal).toInt
+      case (ShortType, INT) =>
+        (getter, ordinal) => getter.getShort(ordinal).toInt
+      case (IntegerType, INT) =>
+        (getter, ordinal) => getter.getInt(ordinal)
+      case (LongType, LONG) =>
+        (getter, ordinal) => getter.getLong(ordinal)
+      case (FloatType, FLOAT) =>
+        (getter, ordinal) => getter.getFloat(ordinal)
+      case (DoubleType, DOUBLE) =>
+        (getter, ordinal) => getter.getDouble(ordinal)
+      case (d: DecimalType, FIXED)
+        if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) =>
+        (getter, ordinal) =>
+          val decimal = getter.getDecimal(ordinal, d.precision, d.scale)
+          decimalConversions.toFixed(decimal.toJavaBigDecimal, avroType,
+            LogicalTypes.decimal(d.precision, d.scale))
+
+      case (d: DecimalType, BYTES)
+        if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) =>
+        (getter, ordinal) =>
+          val decimal = getter.getDecimal(ordinal, d.precision, d.scale)
+          decimalConversions.toBytes(decimal.toJavaBigDecimal, avroType,
+            LogicalTypes.decimal(d.precision, d.scale))
+
+      case (StringType, ENUM) =>
+        val enumSymbols: Set[String] = avroType.getEnumSymbols.asScala.toSet
+        (getter, ordinal) =>
+          val data = getter.getUTF8String(ordinal).toString
+          if (!enumSymbols.contains(data)) {
+            throw new IncompatibleSchemaException(errorPrefix +
+              s""""$data" cannot be written since it's not defined in enum """ +
+              enumSymbols.mkString("\"", "\", \"", "\""))
+          }
+          new EnumSymbol(avroType, data)
+
+      case (StringType, STRING) =>
+        (getter, ordinal) => new Utf8(getter.getUTF8String(ordinal).getBytes)
+
+      case (BinaryType, FIXED) =>
+        val size = avroType.getFixedSize
+        (getter, ordinal) =>
+          val data: Array[Byte] = getter.getBinary(ordinal)
+          if (data.length != size) {
+            def len2str(len: Int): String = s"$len ${if (len > 1) "bytes" else "byte"}"
+
+            throw new IncompatibleSchemaException(errorPrefix + len2str(data.length) +
+              " of binary data cannot be written into FIXED type with size of " + len2str(size))
+          }
+          new Fixed(avroType, data)
+
+      case (BinaryType, BYTES) =>
+        (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal))
+
+      case (DateType, INT) =>
+        (getter, ordinal) => dateRebaseFunc(getter.getInt(ordinal))
+
+      case (TimestampType, LONG) => avroType.getLogicalType match {
+        // For backward compatibility, if the Avro type is Long and it is not logical type
+        // (the `null` case), output the timestamp value as with millisecond precision.
+        case null | _: TimestampMillis => (getter, ordinal) =>
+          DateTimeUtils.microsToMillis(timestampRebaseFunc(getter.getLong(ordinal)))
+        case _: TimestampMicros => (getter, ordinal) =>
+          timestampRebaseFunc(getter.getLong(ordinal))
+        case other => throw new IncompatibleSchemaException(errorPrefix +
+          s"SQL type ${TimestampType.sql} cannot be converted to Avro logical type $other")
+      }
+
+      case (ArrayType(et, containsNull), ARRAY) =>
+        val elementConverter = newConverter(
+          et, resolveNullableType(avroType.getElementType, containsNull),
+          catalystPath :+ "element", avroPath :+ "element")
+        (getter, ordinal) => {
+          val arrayData = getter.getArray(ordinal)
+          val len = arrayData.numElements()
+          val result = new Array[Any](len)
+          var i = 0
+          while (i < len) {
+            if (containsNull && arrayData.isNullAt(i)) {
+              result(i) = null
+            } else {
+              result(i) = elementConverter(arrayData, i)
+            }
+            i += 1
+          }
+          // avro writer is expecting a Java Collection, so we convert it into
+          // `ArrayList` backed by the specified array without data copying.
+          java.util.Arrays.asList(result: _*)
+        }
+
+      case (st: StructType, RECORD) =>
+        val structConverter = newStructConverter(st, avroType, catalystPath, avroPath)
+        val numFields = st.length
+        (getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
+
+      case (st: StructType, UNION) =>
+        val unionConverter = newUnionConverter(st, avroType, catalystPath, avroPath)
+        val numFields = st.length
+        (getter, ordinal) => unionConverter(getter.getStruct(ordinal, numFields))
+
+      case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType =>
+        val valueConverter = newConverter(
+          vt, resolveNullableType(avroType.getValueType, valueContainsNull),
+          catalystPath :+ "value", avroPath :+ "value")
+        (getter, ordinal) =>
+          val mapData = getter.getMap(ordinal)
+          val len = mapData.numElements()
+          val result = new java.util.HashMap[String, Any](len)
+          val keyArray = mapData.keyArray()
+          val valueArray = mapData.valueArray()
+          var i = 0
+          while (i < len) {
+            val key = keyArray.getUTF8String(i).toString
+            if (valueContainsNull && valueArray.isNullAt(i)) {
+              result.put(key, null)
+            } else {
+              result.put(key, valueConverter(valueArray, i))
+            }
+            i += 1
+          }
+          result
+
+      case _ =>
+        throw new IncompatibleSchemaException(errorPrefix +
+          s"schema is incompatible (sqlType = ${catalystType.sql}, avroType = $avroType)")
+    }
+  }
+
+  private def newStructConverter(catalystStruct: StructType,
+                                 avroStruct: Schema,
+                                 catalystPath: Seq[String],
+                                 avroPath: Seq[String]): InternalRow => Record = {
+
+    val avroPathStr = toFieldStr(avroPath)
+    if (avroStruct.getType != RECORD) {
+      throw new IncompatibleSchemaException(s"$avroPathStr was not a RECORD")
+    }
+    val avroFields = avroStruct.getFields.asScala
+    if (avroFields.size != catalystStruct.length) {
+      throw new IncompatibleSchemaException(
+        s"Avro $avroPathStr schema length (${avroFields.size}) doesn't match " +
+          s"SQL ${toFieldStr(catalystPath)} schema length (${catalystStruct.length})")
+    }
+    val avroSchemaHelper =
+      new AvroUtils.AvroSchemaHelper(avroStruct, avroPath, positionalFieldMatch)
+
+    val (avroIndices: Array[Int], fieldConverters: Array[Converter]) =
+      catalystStruct.zipWithIndex.map { case (catalystField, catalystPos) =>
+        val avroField = avroSchemaHelper.getAvroField(catalystField.name, catalystPos) match {
+          case Some(f) => f
+          case None =>
+            val fieldDescription = toFieldDescription(
+              catalystPath :+ catalystField.name, catalystPos, positionalFieldMatch)
+            throw new IncompatibleSchemaException(
+              s"Cannot find $fieldDescription in Avro schema at $avroPathStr")
+        }
+        val converter = newConverter(catalystField.dataType,
+          resolveNullableType(avroField.schema(), catalystField.nullable),
+          catalystPath :+ catalystField.name, avroPath :+ avroField.name)
+        (avroField.pos(), converter)
+      }.toArray.unzip
+
+    val numFields = catalystStruct.length
+    row: InternalRow =>
+      val result = new Record(avroStruct)
+      var i = 0
+      while (i < numFields) {
+        if (row.isNullAt(i)) {
+          result.put(avroIndices(i), null)
+        } else {
+          result.put(avroIndices(i), fieldConverters(i).apply(row, i))
+        }
+        i += 1
+      }
+      result
+  }
+
+  private def newUnionConverter(catalystStruct: StructType,
+                                avroUnion: Schema,
+                                catalystPath: Seq[String],
+                                avroPath: Seq[String]): InternalRow => Any = {
+    if (avroUnion.getType != UNION || !canMapUnion(catalystStruct, avroUnion)) {
+      throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
+        s"Avro type $avroUnion.")
+    }
+    val nullable = avroUnion.getTypes.size() > 0 && avroUnion.getTypes.get(0).getType == Type.NULL
+    val avroInnerTypes = if (nullable) {
+      avroUnion.getTypes.asScala.tail
+    } else {
+      avroUnion.getTypes.asScala
+    }
+    val fieldConverters = catalystStruct.zip(avroInnerTypes).map {
+      case (f1, f2) => newConverter(f1.dataType, f2, catalystPath, avroPath)
+    }
+    val numFields = catalystStruct.length
+    (row: InternalRow) =>
+      var i = 0
+      var result: Any = null
+      while (i < numFields) {
+        if (!row.isNullAt(i)) {
+          if (result != null) {
+            throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
+              s"Avro union $avroUnion. Record has more than one optional values set")
+          }
+          result = fieldConverters(i).apply(row, i)
+        }
+        i += 1
+      }
+      if (!nullable && result == null) {
+        throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
+          s"Avro union $avroUnion. Record has no values set, while should have exactly one")
+      }
+      result
+  }
+
+  private def canMapUnion(catalystStruct: StructType, avroStruct: Schema): Boolean = {
+    (avroStruct.getTypes.size() > 0 &&
+      avroStruct.getTypes.get(0).getType == Type.NULL &&
+      avroStruct.getTypes.size() - 1 == catalystStruct.length) || avroStruct.getTypes.size() == catalystStruct.length
+  }
+
+  /**
+   * Resolve a possibly nullable Avro Type.
+   *
+   * An Avro type is nullable when it is a [[UNION]] of two types: one null type and another
+   * non-null type. This method will check the nullability of the input Avro type and return the
+   * non-null type within when it is nullable. Otherwise it will return the input Avro type
+   * unchanged. It will throw an [[UnsupportedAvroTypeException]] when the input Avro type is an
+   * unsupported nullable type.
+   *
+   * It will also log a warning message if the nullability for Avro and catalyst types are
+   * different.
+   */
+  private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = {
+    val (avroNullable, resolvedAvroType) = resolveAvroType(avroType)
+    warnNullabilityDifference(avroNullable, nullable)
+    resolvedAvroType
+  }
+
+  /**
+   * Check the nullability of the input Avro type and resolve it when it is nullable. The first
+   * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second
+   * return value is the possibly resolved type.
+   */
+  private def resolveAvroType(avroType: Schema): (Boolean, Schema) = {
+    if (avroType.getType == Type.UNION) {
+      val fields = avroType.getTypes.asScala
+      val actualType = fields.filter(_.getType != Type.NULL)
+      if (fields.length == 2 && actualType.length == 1) {
+        (true, actualType.head)
+      } else {
+        // This is just a normal union, not used to designate nullability
+        (false, avroType)
+      }
+    } else {
+      (false, avroType)
+    }
+  }
+
+  /**
+   * log a warning message if the nullability for Avro and catalyst types are different.
+   */
+  private def warnNullabilityDifference(avroNullable: Boolean, catalystNullable: Boolean): Unit = {
+    if (avroNullable && !catalystNullable) {
+      logWarning("Writing Avro files with nullable Avro schema and non-nullable catalyst schema.")
+    }
+    if (!avroNullable && catalystNullable) {
+      logWarning("Writing Avro files with non-nullable Avro schema and nullable catalyst " +
+        "schema will throw runtime exception if there is a record with null value.")
+    }
+  }
+}
+
+object AvroSerializer {
+
+  // NOTE: Following methods have been renamed in Spark 3.2.1 [1] making [[AvroSerializer]] implementation
+  //       (which relies on it) be only compatible with the exact same version of [[DataSourceUtils]].
+  //       To make sure this implementation is compatible w/ all Spark versions w/in Spark 3.2.x branch,
+  //       we're preemptively cloned those methods to make sure Hudi is compatible w/ Spark 3.2.0 as well as
+  //       w/ Spark >= 3.2.1
+  //
+  // [1] https://github.com/apache/spark/pull/34978
+
+  def createDateRebaseFuncInWrite(rebaseMode: LegacyBehaviorPolicy.Value,
+                                  format: String): Int => Int = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => days: Int =>
+      if (days < RebaseDateTime.lastSwitchGregorianDay) {
+        throw DataSourceUtils.newRebaseExceptionInWrite(format)
+      }
+      days
+    case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseGregorianToJulianDays
+    case LegacyBehaviorPolicy.CORRECTED => identity[Int]
+  }
+
+  def createTimestampRebaseFuncInWrite(rebaseMode: LegacyBehaviorPolicy.Value,
+                                       format: String): Long => Long = rebaseMode match {
+    case LegacyBehaviorPolicy.EXCEPTION => micros: Long =>
+      if (micros < RebaseDateTime.lastSwitchGregorianTs) {
+        throw DataSourceUtils.newRebaseExceptionInWrite(format)
+      }
+      micros
+    case LegacyBehaviorPolicy.LEGACY =>
+      val timeZone = SQLConf.get.sessionLocalTimeZone
+      RebaseDateTime.rebaseGregorianToJulianMicros(TimeZone.getTimeZone(timeZone), _)
+    case LegacyBehaviorPolicy.CORRECTED => identity[Long]
+  }
+
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
new file mode 100644
index 0000000000000..f63133795ed91
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.internal.SQLConf
+
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+
+/**
+ * NOTE: This code is borrowed from Spark 3.2.1
+ *       This code is borrowed, so that we can better control compatibility w/in Spark minor
+ *       branches (3.2.x, 3.1.x, etc)
+ *
+ *       PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY
+ */
+private[avro] object AvroUtils {
+
+  /**
+   * Wraps an Avro Schema object so that field lookups are faster.
+   *
+   * @param avroSchema           The schema in which to search for fields. Must be of type RECORD.
+   * @param avroPath             The seq of parent field names leading to `avroSchema`.
+   * @param positionalFieldMatch If true, perform field matching in a positional fashion
+   *                             (structural comparison between schemas, ignoring names);
+   *                             otherwise, perform field matching using field names.
+   */
+  class AvroSchemaHelper(avroSchema: Schema,
+                         avroPath: Seq[String],
+                         positionalFieldMatch: Boolean) {
+    if (avroSchema.getType != Schema.Type.RECORD) {
+      throw new IncompatibleSchemaException(
+        s"Attempting to treat ${avroSchema.getName} as a RECORD, but it was: ${avroSchema.getType}")
+    }
+
+    private[this] val avroFieldArray = avroSchema.getFields.asScala.toArray
+    private[this] val fieldMap = avroSchema.getFields.asScala
+      .groupBy(_.name.toLowerCase(Locale.ROOT))
+      .mapValues(_.toSeq) // toSeq needed for scala 2.13
+
+    /**
+     * Extract a single field from the contained avro schema which has the desired field name,
+     * performing the matching with proper case sensitivity according to SQLConf.resolver.
+     *
+     * @param name The name of the field to search for.
+     * @return `Some(match)` if a matching Avro field is found, otherwise `None`.
+     */
+    private[avro] def getFieldByName(name: String): Option[Schema.Field] = {
+
+      // get candidates, ignoring case of field name
+      val candidates = fieldMap.getOrElse(name.toLowerCase(Locale.ROOT), Seq.empty)
+
+      // search candidates, taking into account case sensitivity settings
+      candidates.filter(f => SQLConf.get.resolver(f.name(), name)) match {
+        case Seq(avroField) => Some(avroField)
+        case Seq() => None
+        case matches => throw new IncompatibleSchemaException(s"Searching for '$name' in Avro " +
+          s"schema at ${toFieldStr(avroPath)} gave ${matches.size} matches. Candidates: " +
+          matches.map(_.name()).mkString("[", ", ", "]")
+        )
+      }
+    }
+
+    /** Get the Avro field corresponding to the provided Catalyst field name/position, if any. */
+    def getAvroField(fieldName: String, catalystPos: Int): Option[Schema.Field] = {
+      if (positionalFieldMatch) {
+        avroFieldArray.lift(catalystPos)
+      } else {
+        getFieldByName(fieldName)
+      }
+    }
+  }
+
+
+  /**
+   * Take a field's hierarchical names (see [[toFieldStr]]) and position, and convert it to a
+   * human-readable description of the field. Depending on the value of `positionalFieldMatch`,
+   * either the position or name will be emphasized (for true and false, respectively); both will
+   * be included in either case.
+   */
+  private[avro] def toFieldDescription(
+                                        names: Seq[String],
+                                        position: Int,
+                                        positionalFieldMatch: Boolean): String = if (positionalFieldMatch) {
+    s"field at position $position (${toFieldStr(names)})"
+  } else {
+    s"${toFieldStr(names)} (at position $position)"
+  }
+
+  /**
+   * Convert a sequence of hierarchical field names (like `Seq(foo, bar)`) into a human-readable
+   * string representing the field, like "field 'foo.bar'". If `names` is empty, the string
+   * "top-level record" is returned.
+   */
+  private[avro] def toFieldStr(names: Seq[String]): String = names match {
+    case Seq() => "top-level record"
+    case n => s"field '${n.mkString(".")}'"
+  }
+
+}
diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroDeserializer.scala
similarity index 60%
rename from hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3AvroDeserializer.scala
rename to hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroDeserializer.scala
index bd9ead5a70b6d..0275e2f635d3b 100644
--- a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3AvroDeserializer.scala
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroDeserializer.scala
@@ -21,18 +21,10 @@ import org.apache.avro.Schema
 import org.apache.hudi.HoodieSparkUtils
 import org.apache.spark.sql.types.DataType
 
-class HoodieSpark3AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
+class HoodieSpark3_2AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
   extends HoodieAvroDeserializer {
 
-  // SPARK-34404: As of Spark3.2, there is no AvroDeserializer's constructor with Schema and DataType arguments.
-  // So use the reflection to get AvroDeserializer instance.
-  private val avroDeserializer = if (HoodieSparkUtils.isSpark3_2) {
-    val constructor = classOf[AvroDeserializer].getConstructor(classOf[Schema], classOf[DataType], classOf[String])
-    constructor.newInstance(rootAvroType, rootCatalystType, "EXCEPTION")
-  } else {
-    val constructor = classOf[AvroDeserializer].getConstructor(classOf[Schema], classOf[DataType])
-    constructor.newInstance(rootAvroType, rootCatalystType)
-  }
+  private val avroDeserializer = new AvroDeserializer(rootAvroType, rootCatalystType, "EXCEPTION")
 
   def deserialize(data: Any): Option[Any] = avroDeserializer.deserialize(data)
 }
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroSerializer.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroSerializer.scala
new file mode 100644
index 0000000000000..6e76ba68f95ff
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroSerializer.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.types.DataType
+
+class HoodieSpark3_2AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
+  extends HoodieAvroSerializer {
+
+  val avroSerializer = new AvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  override def serialize(catalystData: Any): Any = avroSerializer.serialize(catalystData)
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala
new file mode 100644
index 0000000000000..28db4739656e7
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.net.URI
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapred.FileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter
+import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.common.util.InternalSchemaCache
+import org.apache.hudi.common.util.collection.Pair
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.action.InternalSchemaMerger
+import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper}
+import org.apache.parquet.filter2.compat.FilterCompat
+import org.apache.parquet.filter2.predicate.FilterApi
+import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS
+import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetRecordReader}
+import org.apache.spark.TaskContext
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Cast, JoinedRow}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.{DataSourceUtils, PartitionedFile, RecordReaderIterator}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types.{AtomicType, DataType, StructField, StructType}
+import org.apache.spark.util.SerializableConfiguration
+
+class Spark32HoodieParquetFileFormat extends ParquetFileFormat {
+
+  // reference ParquetFileFormat from spark project
+  override def buildReaderWithPartitionValues(
+                                               sparkSession: SparkSession,
+                                               dataSchema: StructType,
+                                               partitionSchema: StructType,
+                                               requiredSchema: StructType,
+                                               filters: Seq[Filter],
+                                               options: Map[String, String],
+                                               hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    if (hadoopConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, "").isEmpty) {
+      // fallback to origin parquet File read
+      super.buildReaderWithPartitionValues(sparkSession, dataSchema, partitionSchema, requiredSchema, filters, options, hadoopConf)
+    } else {
+      hadoopConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[ParquetReadSupport].getName)
+      hadoopConf.set(
+        ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
+        requiredSchema.json)
+      hadoopConf.set(
+        ParquetWriteSupport.SPARK_ROW_SCHEMA,
+        requiredSchema.json)
+      hadoopConf.set(
+        SQLConf.SESSION_LOCAL_TIMEZONE.key,
+        sparkSession.sessionState.conf.sessionLocalTimeZone)
+      hadoopConf.setBoolean(
+        SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key,
+        sparkSession.sessionState.conf.nestedSchemaPruningEnabled)
+      hadoopConf.setBoolean(
+        SQLConf.CASE_SENSITIVE.key,
+        sparkSession.sessionState.conf.caseSensitiveAnalysis)
+
+      ParquetWriteSupport.setSchema(requiredSchema, hadoopConf)
+
+      // Sets flags for `ParquetToSparkSchemaConverter`
+      hadoopConf.setBoolean(
+        SQLConf.PARQUET_BINARY_AS_STRING.key,
+        sparkSession.sessionState.conf.isParquetBinaryAsString)
+      hadoopConf.setBoolean(
+        SQLConf.PARQUET_INT96_AS_TIMESTAMP.key,
+        sparkSession.sessionState.conf.isParquetINT96AsTimestamp)
+      // for dataSource v1, we have no method to do project for spark physical plan.
+      // it's safe to do cols project here.
+      val internalSchemaString = hadoopConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA)
+      val querySchemaOption = SerDeHelper.fromJson(internalSchemaString)
+      if (querySchemaOption.isPresent && !requiredSchema.isEmpty) {
+        val prunedSchema = SparkInternalSchemaConverter.convertAndPruneStructTypeToInternalSchema(requiredSchema, querySchemaOption.get())
+        hadoopConf.set(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, SerDeHelper.toJson(prunedSchema))
+      }
+      val broadcastedHadoopConf =
+        sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+
+      // TODO: if you move this into the closure it reverts to the default values.
+      // If true, enable using the custom RecordReader for parquet. This only works for
+      // a subset of the types (no complex types).
+      val resultSchema = StructType(partitionSchema.fields ++ requiredSchema.fields)
+      val sqlConf = sparkSession.sessionState.conf
+      val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled
+      val enableVectorizedReader: Boolean =
+        sqlConf.parquetVectorizedReaderEnabled &&
+          resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
+      val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled
+      val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion
+      val capacity = sqlConf.parquetVectorizedReaderBatchSize
+      val enableParquetFilterPushDown: Boolean = sqlConf.parquetFilterPushDown
+      // Whole stage codegen (PhysicalRDD) is able to deal with batches directly
+      val returningBatch = supportBatch(sparkSession, resultSchema)
+      val pushDownDate = sqlConf.parquetFilterPushDownDate
+      val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
+      val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
+      val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringStartWith
+      val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
+      val isCaseSensitive = sqlConf.caseSensitiveAnalysis
+      val parquetOptions = new ParquetOptions(options, sparkSession.sessionState.conf)
+      val datetimeRebaseModeInRead = parquetOptions.datetimeRebaseModeInRead
+      val int96RebaseModeInread = parquetOptions.int96RebaseModeInRead
+
+      (file: PartitionedFile) => {
+        assert(file.partitionValues.numFields == partitionSchema.size)
+        val filePath = new Path(new URI(file.filePath))
+        val split = new FileSplit(filePath, file.start, file.length, Array.empty[String])
+        val sharedConf = broadcastedHadoopConf.value.value
+        // do deal with internalSchema
+        val internalSchemaString = sharedConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA)
+        // querySchema must be a pruned schema.
+        val querySchemaOption = SerDeHelper.fromJson(internalSchemaString)
+        val internalSchemaChangeEnabled = if (internalSchemaString.isEmpty || !querySchemaOption.isPresent) false else true
+        val tablePath = sharedConf.get(SparkInternalSchemaConverter.HOODIE_TABLE_PATH)
+        val commitInstantTime = FSUtils.getCommitTime(filePath.getName).toLong;
+        val fileSchema = if (internalSchemaChangeEnabled) {
+          val validCommits = sharedConf.get(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST)
+          InternalSchemaCache.getInternalSchemaByVersionId(commitInstantTime, tablePath, sharedConf, if (validCommits == null) "" else validCommits)
+        } else {
+          // this should not happened, searchSchemaAndCache will deal with correctly.
+          null
+        }
+
+        lazy val footerFileMetaData =
+          ParquetFooterReader.readFooter(sharedConf, filePath, SKIP_ROW_GROUPS).getFileMetaData
+        val datetimeRebaseSpec = DataSourceUtils.datetimeRebaseSpec(
+          footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+        // Try to push down filters when filter push-down is enabled.
+        val pushed = if (enableParquetFilterPushDown) {
+          val parquetSchema = footerFileMetaData.getSchema
+          val parquetFilters = new ParquetFilters(
+            parquetSchema,
+            pushDownDate,
+            pushDownTimestamp,
+            pushDownDecimal,
+            pushDownStringStartWith,
+            pushDownInFilterThreshold,
+            isCaseSensitive,
+            datetimeRebaseSpec)
+          filters.map(Spark32HoodieParquetFileFormat.rebuildFilterFromParquet(_, fileSchema, querySchemaOption.get()))
+            // Collects all converted Parquet filter predicates. Notice that not all predicates can be
+            // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
+            // is used here.
+            .flatMap(parquetFilters.createFilter(_))
+            .reduceOption(FilterApi.and)
+        } else {
+          None
+        }
+
+        // PARQUET_INT96_TIMESTAMP_CONVERSION says to apply timezone conversions to int96 timestamps'
+        // *only* if the file was created by something other than "parquet-mr", so check the actual
+        // writer here for this file.  We have to do this per-file, as each file in the table may
+        // have different writers.
+        // Define isCreatedByParquetMr as function to avoid unnecessary parquet footer reads.
+        def isCreatedByParquetMr: Boolean =
+          footerFileMetaData.getCreatedBy().startsWith("parquet-mr")
+
+        val convertTz =
+          if (timestampConversion && !isCreatedByParquetMr) {
+            Some(DateTimeUtils.getZoneId(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
+          } else {
+            None
+          }
+        val int96RebaseSpec = DataSourceUtils.int96RebaseSpec(
+          footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInread)
+
+        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+        // use new conf
+        val hadoopAttempConf = new Configuration(broadcastedHadoopConf.value.value)
+        //
+        // reset request schema
+        var typeChangeInfos: java.util.Map[Integer, Pair[DataType, DataType]] = new java.util.HashMap()
+        if (internalSchemaChangeEnabled) {
+          val mergedInternalSchema = new InternalSchemaMerger(fileSchema, querySchemaOption.get(), true, true).mergeSchema()
+          val mergedSchema = SparkInternalSchemaConverter.constructSparkSchemaFromInternalSchema(mergedInternalSchema)
+          typeChangeInfos = SparkInternalSchemaConverter.collectTypeChangedCols(querySchemaOption.get(), mergedInternalSchema)
+          hadoopAttempConf.set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA, mergedSchema.json)
+        }
+        val hadoopAttemptContext =
+          new TaskAttemptContextImpl(hadoopAttempConf, attemptId)
+
+        // Try to push down filters when filter push-down is enabled.
+        // Notice: This push-down is RowGroups level, not individual records.
+        if (pushed.isDefined) {
+          ParquetInputFormat.setFilterPredicate(hadoopAttemptContext.getConfiguration, pushed.get)
+        }
+        val taskContext = Option(TaskContext.get())
+        if (enableVectorizedReader) {
+          val vectorizedReader = new Spark32HoodieVectorizedParquetRecordReader(
+            convertTz.orNull,
+            datetimeRebaseSpec.mode.toString,
+            datetimeRebaseSpec.timeZone,
+            int96RebaseSpec.mode.toString,
+            int96RebaseSpec.timeZone,
+            enableOffHeapColumnVector && taskContext.isDefined,
+            capacity, typeChangeInfos)
+          val iter = new RecordReaderIterator(vectorizedReader)
+          // SPARK-23457 Register a task completion listener before `initialization`.
+          //          taskContext.foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
+          try {
+            vectorizedReader.initialize(split, hadoopAttemptContext)
+            logDebug(s"Appending $partitionSchema ${file.partitionValues}")
+            vectorizedReader.initBatch(partitionSchema, file.partitionValues)
+            if (returningBatch) {
+              vectorizedReader.enableReturningBatches()
+            }
+
+            // UnsafeRowParquetRecordReader appends the columns internally to avoid another copy.
+            iter.asInstanceOf[Iterator[InternalRow]]
+          } catch {
+            case e: Throwable =>
+              // SPARK-23457: In case there is an exception in initialization, close the iterator to
+              // avoid leaking resources.
+              iter.close()
+              throw e
+          }
+        } else {
+          logDebug(s"Falling back to parquet-mr")
+          // ParquetRecordReader returns InternalRow
+          val readSupport = new ParquetReadSupport(
+            convertTz,
+            enableVectorizedReader = false,
+            datetimeRebaseSpec,
+            int96RebaseSpec)
+          val reader = if (pushed.isDefined && enableRecordFilter) {
+            val parquetFilter = FilterCompat.get(pushed.get, null)
+            new ParquetRecordReader[InternalRow](readSupport, parquetFilter)
+          } else {
+            new ParquetRecordReader[InternalRow](readSupport)
+          }
+          val iter = new RecordReaderIterator[InternalRow](reader)
+          // SPARK-23457 Register a task completion listener before `initialization`.
+          taskContext.foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
+          reader.initialize(split, hadoopAttemptContext)
+
+          val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes
+          val unsafeProjection = if (typeChangeInfos.isEmpty) {
+            GenerateUnsafeProjection.generate(fullSchema, fullSchema)
+          } else {
+            // find type changed.
+            val newFullSchema = new StructType(requiredSchema.fields.zipWithIndex.map { case (f, i) =>
+              if (typeChangeInfos.containsKey(i)) {
+                StructField(f.name, typeChangeInfos.get(i).getRight, f.nullable, f.metadata)
+              } else f
+            }).toAttributes ++ partitionSchema.toAttributes
+            val castSchema = newFullSchema.zipWithIndex.map { case (attr, i) =>
+              if (typeChangeInfos.containsKey(i)) {
+                Cast(attr, typeChangeInfos.get(i).getLeft)
+              } else attr
+            }
+            GenerateUnsafeProjection.generate(castSchema, newFullSchema)
+          }
+
+          if (partitionSchema.length == 0) {
+            // There is no partition columns
+            iter.map(unsafeProjection)
+          } else {
+            val joinedRow = new JoinedRow()
+            iter.map(d => unsafeProjection(joinedRow(d, file.partitionValues)))
+          }
+        }
+      }
+    }
+  }
+}
+
+object Spark32HoodieParquetFileFormat {
+
+  private def rebuildFilterFromParquet(oldFilter: Filter, fileSchema: InternalSchema, querySchema: InternalSchema): Filter = {
+    if (fileSchema == null || querySchema == null) {
+      oldFilter
+    } else {
+      oldFilter match {
+        case eq: EqualTo =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(eq.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else eq.copy(attribute = newAttribute)
+        case eqs: EqualNullSafe =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(eqs.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else eqs.copy(attribute = newAttribute)
+        case gt: GreaterThan =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(gt.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else gt.copy(attribute = newAttribute)
+        case gtr: GreaterThanOrEqual =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(gtr.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else gtr.copy(attribute = newAttribute)
+        case lt: LessThan =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(lt.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else lt.copy(attribute = newAttribute)
+        case lte: LessThanOrEqual =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(lte.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else lte.copy(attribute = newAttribute)
+        case i: In =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(i.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else i.copy(attribute = newAttribute)
+        case isn: IsNull =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(isn.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else isn.copy(attribute = newAttribute)
+        case isnn: IsNotNull =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(isnn.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else isnn.copy(attribute = newAttribute)
+        case And(left, right) =>
+          And(rebuildFilterFromParquet(left, fileSchema, querySchema), rebuildFilterFromParquet(right, fileSchema, querySchema))
+        case Or(left, right) =>
+          Or(rebuildFilterFromParquet(left, fileSchema, querySchema), rebuildFilterFromParquet(right, fileSchema, querySchema))
+        case Not(child) =>
+          Not(rebuildFilterFromParquet(child, fileSchema, querySchema))
+        case ssw: StringStartsWith =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(ssw.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else ssw.copy(attribute = newAttribute)
+        case ses: StringEndsWith =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(ses.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else ses.copy(attribute = newAttribute)
+        case sc: StringContains =>
+          val newAttribute = InternalSchemaUtils.reBuildFilterName(sc.attribute, fileSchema, querySchema)
+          if (newAttribute.isEmpty) AlwaysTrue else sc.copy(attribute = newAttribute)
+        case AlwaysTrue =>
+          AlwaysTrue
+        case AlwaysFalse =>
+          AlwaysFalse
+        case _ =>
+          AlwaysTrue
+      }
+    }
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommandSpark32.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommandSpark32.scala
new file mode 100644
index 0000000000000..96d919cf0a5b5
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommandSpark32.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi
+
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.internal.schema.action.TableChange.ColumnChangeID
+import org.apache.spark.sql.catalyst.analysis.ResolvedTable
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumn, DropColumns, LogicalPlan, RenameColumn, ReplaceColumns, SetTableProperties, UnsetTableProperties}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table
+import org.apache.spark.sql.hudi.command.{AlterTableCommand => HudiAlterTableCommand}
+
+/**
+  * Rule to mostly resolve, normalize and rewrite column names based on case sensitivity.
+  * for alter table column commands.
+  */
+class ResolveHudiAlterTableCommandSpark32(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case set @ SetTableProperties(asTable(table), _) if schemaEvolutionEnabled && set.resolved =>
+      HudiAlterTableCommand(table, set.changes, ColumnChangeID.PROPERTY_CHANGE)
+    case unSet @ UnsetTableProperties(asTable(table), _, _) if schemaEvolutionEnabled && unSet.resolved =>
+      HudiAlterTableCommand(table, unSet.changes, ColumnChangeID.PROPERTY_CHANGE)
+    case drop @ DropColumns(asTable(table), _) if schemaEvolutionEnabled && drop.resolved =>
+      HudiAlterTableCommand(table, drop.changes, ColumnChangeID.DELETE)
+    case add @ AddColumns(asTable(table), _) if schemaEvolutionEnabled  && add.resolved =>
+      HudiAlterTableCommand(table, add.changes, ColumnChangeID.ADD)
+    case renameColumn @ RenameColumn(asTable(table), _, _) if schemaEvolutionEnabled && renameColumn.resolved=>
+      HudiAlterTableCommand(table, renameColumn.changes, ColumnChangeID.UPDATE)
+    case alter @ AlterColumn(asTable(table), _, _, _, _, _) if schemaEvolutionEnabled && alter.resolved =>
+      HudiAlterTableCommand(table, alter.changes, ColumnChangeID.UPDATE)
+    case replace @ ReplaceColumns(asTable(table), _) if schemaEvolutionEnabled && replace.resolved =>
+      HudiAlterTableCommand(table, replace.changes, ColumnChangeID.REPLACE)
+  }
+
+  private def schemaEvolutionEnabled(): Boolean = sparkSession
+    .sessionState.conf.getConfString(HoodieWriteConfig.SCHEMA_EVOLUTION_ENABLE.key(), "false").toBoolean
+
+  object asTable {
+    def unapply(a: LogicalPlan): Option[CatalogTable] = {
+      a match {
+        case ResolvedTable(_, _, table: HoodieInternalV2Table, _) =>
+          table.catalogTable
+        case _ =>
+          None
+      }
+    }
+  }
+}
+
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala
index 3046af991404b..d9858b69cc081 100644
--- a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala
@@ -19,12 +19,10 @@
 package org.apache.spark.sql.hudi.catalog
 
 import org.apache.hadoop.fs.Path
-import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
-import org.apache.hudi.client.common.HoodieSparkEngineContext
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hive.util.ConfigUtils
 import org.apache.hudi.sql.InsertMode
+import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
 import org.apache.spark.sql.HoodieSpark3SqlUtils.convertTransforms
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException, UnresolvedAttribute}
@@ -34,7 +32,7 @@ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChan
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.hudi.command.{AlterHoodieTableAddColumnsCommand, AlterHoodieTableChangeColumnCommand, AlterHoodieTableRenameCommand, CreateHoodieTableCommand}
+import org.apache.spark.sql.hudi.command._
 import org.apache.spark.sql.hudi.{HoodieSqlCommonUtils, ProvidesHoodieConfig}
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.{Dataset, SaveMode, SparkSession, _}
@@ -116,21 +114,24 @@ class HoodieCatalog extends DelegatingCatalogExtension
 
   override def tableExists(ident: Identifier): Boolean = super.tableExists(ident)
 
-  override def dropTable(ident: Identifier): Boolean = super.dropTable(ident)
+  override def dropTable(ident: Identifier): Boolean = {
+    val table = loadTable(ident)
+    table match {
+      case _: HoodieInternalV2Table =>
+        DropHoodieTableCommand(ident.asTableIdentifier, ifExists = true, isView = false, purge = false).run(spark)
+        true
+      case _ => super.dropTable(ident)
+    }
+  }
 
   override def purgeTable(ident: Identifier): Boolean = {
     val table = loadTable(ident)
     table match {
-      case hoodieTable: HoodieInternalV2Table =>
-        val location = hoodieTable.hoodieCatalogTable.tableLocation
-        val targetPath = new Path(location)
-        val engineContext = new HoodieSparkEngineContext(spark.sparkContext)
-        val fs = FSUtils.getFs(location, spark.sparkContext.hadoopConfiguration)
-        FSUtils.deleteDir(engineContext, fs, targetPath, spark.sparkContext.defaultParallelism)
-        super.dropTable(ident)
-      case _ =>
+      case _: HoodieInternalV2Table =>
+        DropHoodieTableCommand(ident.asTableIdentifier, ifExists = true, isView = false, purge = true).run(spark)
+        true
+      case _ => super.purgeTable(ident)
     }
-    true
   }
 
   @throws[NoSuchTableException]
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala
new file mode 100644
index 0000000000000..bca3e7050c792
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command
+
+import java.net.URI
+import java.nio.charset.StandardCharsets
+import java.util
+import java.util.concurrent.atomic.AtomicInteger
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.client.utils.SparkInternalSchemaConverter
+import org.apache.hudi.common.model.{HoodieCommitMetadata, WriteOperationType}
+import org.apache.hudi.{DataSourceOptionsHelper, DataSourceUtils}
+import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstant}
+import org.apache.hudi.common.table.timeline.HoodieInstant.State
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.util.{CommitUtils, Option}
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.internal.schema.InternalSchema
+import org.apache.hudi.internal.schema.action.TableChange.ColumnChangeID
+import org.apache.hudi.internal.schema.action.TableChanges
+import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
+import org.apache.hudi.internal.schema.utils.{SchemaChangeUtils, SerDeHelper}
+import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager
+import org.apache.hudi.table.HoodieSparkTable
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.connector.catalog.{TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, DeleteColumn, RemoveProperty, SetProperty}
+import org.apache.spark.sql.types.StructType
+
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+case class AlterTableCommand(table: CatalogTable, changes: Seq[TableChange], changeType: ColumnChangeID) extends HoodieLeafRunnableCommand with Logging {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    changeType match {
+      case ColumnChangeID.ADD => applyAddAction(sparkSession)
+      case ColumnChangeID.DELETE => applyDeleteAction(sparkSession)
+      case ColumnChangeID.UPDATE => applyUpdateAction(sparkSession)
+      case ColumnChangeID.PROPERTY_CHANGE if (changes.filter(_.isInstanceOf[SetProperty]).size == changes.size) =>
+        applyPropertySet(sparkSession)
+      case ColumnChangeID.PROPERTY_CHANGE if (changes.filter(_.isInstanceOf[RemoveProperty]).size == changes.size) =>
+        applyPropertyUnset(sparkSession)
+      case ColumnChangeID.REPLACE => applyReplaceAction(sparkSession)
+      case other => throw new RuntimeException(s"find unsupported alter command type: ${other}")
+    }
+    Seq.empty[Row]
+  }
+
+  def applyReplaceAction(sparkSession: SparkSession): Unit = {
+    // convert to delete first then add again
+    val deleteChanges = changes.filter(p => p.isInstanceOf[DeleteColumn]).map(_.asInstanceOf[DeleteColumn])
+    val addChanges = changes.filter(p => p.isInstanceOf[AddColumn]).map(_.asInstanceOf[AddColumn])
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val newSchema = applyAddAction2Schema(sparkSession, applyDeleteAction2Schema(sparkSession, oldSchema, deleteChanges), addChanges)
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column replace finished")
+  }
+
+  def applyAddAction2Schema(sparkSession: SparkSession, oldSchema: InternalSchema, addChanges: Seq[AddColumn]): InternalSchema = {
+    val addChange = TableChanges.ColumnAddChange.get(oldSchema)
+    addChanges.foreach { addColumn =>
+      val names = addColumn.fieldNames()
+      val parentName = AlterTableCommand.getParentName(names)
+      // add col change
+      val colType = SparkInternalSchemaConverter.buildTypeFromStructType(addColumn.dataType(), true, new AtomicInteger(0))
+      addChange.addColumns(parentName, names.last, colType, addColumn.comment())
+      // add position change
+      addColumn.position() match {
+        case after: TableChange.After =>
+          addChange.addPositionChange(names.mkString("."),
+            if (parentName.isEmpty) after.column() else parentName + "." + after.column(), "after")
+        case _: TableChange.First =>
+          addChange.addPositionChange(names.mkString("."), "", "first")
+        case _ =>
+      }
+    }
+    SchemaChangeUtils.applyTableChanges2Schema(oldSchema, addChange)
+  }
+
+  def applyDeleteAction2Schema(sparkSession: SparkSession, oldSchema: InternalSchema, deleteChanges: Seq[DeleteColumn]): InternalSchema = {
+    val deleteChange = TableChanges.ColumnDeleteChange.get(oldSchema)
+    deleteChanges.foreach { c =>
+      val originalColName = c.fieldNames().mkString(".")
+      checkSchemaChange(Seq(originalColName), table)
+      deleteChange.deleteColumn(originalColName)
+    }
+    SchemaChangeUtils.applyTableChanges2Schema(oldSchema, deleteChange).setSchemaId(oldSchema.getMaxColumnId)
+  }
+
+
+  def applyAddAction(sparkSession: SparkSession): Unit = {
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val newSchema = applyAddAction2Schema(sparkSession, oldSchema, changes.map(_.asInstanceOf[AddColumn]))
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column add finished")
+  }
+
+  def applyDeleteAction(sparkSession: SparkSession): Unit = {
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val newSchema = applyDeleteAction2Schema(sparkSession, oldSchema, changes.map(_.asInstanceOf[DeleteColumn]))
+    // delete action should not change the getMaxColumnId field.
+    newSchema.setMaxColumnId(oldSchema.getMaxColumnId)
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column delete finished")
+  }
+
+  def applyUpdateAction(sparkSession: SparkSession): Unit = {
+    val (oldSchema, historySchema) = getInternalSchemaAndHistorySchemaStr(sparkSession)
+    val updateChange = TableChanges.ColumnUpdateChange.get(oldSchema)
+    changes.foreach { change =>
+      change match {
+        case updateType: TableChange.UpdateColumnType =>
+          val newType = SparkInternalSchemaConverter.buildTypeFromStructType(updateType.newDataType(), true, new AtomicInteger(0))
+          updateChange.updateColumnType(updateType.fieldNames().mkString("."), newType)
+        case updateComment: TableChange.UpdateColumnComment =>
+          updateChange.updateColumnComment(updateComment.fieldNames().mkString("."), updateComment.newComment())
+        case updateName: TableChange.RenameColumn =>
+          val originalColName = updateName.fieldNames().mkString(".")
+          checkSchemaChange(Seq(originalColName), table)
+          updateChange.renameColumn(originalColName, updateName.newName())
+        case updateNullAbility: TableChange.UpdateColumnNullability =>
+          updateChange.updateColumnNullability(updateNullAbility.fieldNames().mkString("."), updateNullAbility.nullable())
+        case updatePosition: TableChange.UpdateColumnPosition =>
+          val names = updatePosition.fieldNames()
+          val parentName = AlterTableCommand.getParentName(names)
+          updatePosition.position() match {
+            case after: TableChange.After =>
+              updateChange.addPositionChange(names.mkString("."),
+                if (parentName.isEmpty) after.column() else parentName + "." + after.column(), "after")
+            case _: TableChange.First =>
+              updateChange.addPositionChange(names.mkString("."), "", "first")
+            case _ =>
+          }
+      }
+    }
+    val newSchema = SchemaChangeUtils.applyTableChanges2Schema(oldSchema, updateChange)
+    val verifiedHistorySchema = if (historySchema == null || historySchema.isEmpty) {
+      SerDeHelper.inheritSchemas(oldSchema, "")
+    } else {
+      historySchema
+    }
+    AlterTableCommand.commitWithSchema(newSchema, verifiedHistorySchema, table, sparkSession)
+    logInfo("column update finished")
+  }
+
+  // to do support unset default value to columns, and apply them to internalSchema
+  def applyPropertyUnset(sparkSession: SparkSession): Unit = {
+    val catalog = sparkSession.sessionState.catalog
+    val propKeys = changes.map(_.asInstanceOf[RemoveProperty]).map(_.property())
+    // ignore NonExist unset
+    propKeys.foreach { k =>
+      if (!table.properties.contains(k) && k != TableCatalog.PROP_COMMENT) {
+        logWarning(s"find non exist unset property: ${k} , ignore it")
+      }
+    }
+    val tableComment = if (propKeys.contains(TableCatalog.PROP_COMMENT)) None else table.comment
+    val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) }
+    val newTable = table.copy(properties = newProperties, comment = tableComment)
+    catalog.alterTable(newTable)
+    logInfo("table properties change finished")
+  }
+
+  // to do support set default value to columns, and apply them to internalSchema
+  def applyPropertySet(sparkSession: SparkSession): Unit = {
+    val catalog = sparkSession.sessionState.catalog
+    val properties = changes.map(_.asInstanceOf[SetProperty]).map(f => f.property -> f.value).toMap
+    // This overrides old properties and update the comment parameter of CatalogTable
+    // with the newly added/modified comment since CatalogTable also holds comment as its
+    // direct property.
+    val newTable = table.copy(
+      properties = table.properties ++ properties,
+      comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment))
+    catalog.alterTable(newTable)
+    logInfo("table properties change finished")
+  }
+
+  def getInternalSchemaAndHistorySchemaStr(sparkSession: SparkSession): (InternalSchema, String) = {
+    val path = AlterTableCommand.getTableLocation(table, sparkSession)
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    val metaClient = HoodieTableMetaClient.builder().setBasePath(path)
+      .setConf(hadoopConf).build()
+    val schemaUtil = new TableSchemaResolver(metaClient)
+
+    val schema = schemaUtil.getTableInternalSchemaFromCommitMetadata().orElse {
+      AvroInternalSchemaConverter.convert(schemaUtil.getTableAvroSchema)
+    }
+
+    val historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata.orElse("")
+    (schema, historySchemaStr)
+  }
+
+  def checkSchemaChange(colNames: Seq[String], catalogTable: CatalogTable): Unit = {
+    val primaryKeys = catalogTable.storage.properties.getOrElse("primaryKey", catalogTable.properties.getOrElse("primaryKey", "keyid")).split(",").map(_.trim)
+    val preCombineKey = Seq(catalogTable.storage.properties.getOrElse("preCombineField", catalogTable.properties.getOrElse("preCombineField", "ts"))).map(_.trim)
+    val partitionKey = catalogTable.partitionColumnNames.map(_.trim)
+    val checkNames = primaryKeys ++ preCombineKey ++ partitionKey
+    colNames.foreach { col =>
+      if (checkNames.contains(col)) {
+        throw new UnsupportedOperationException("cannot support apply changes for primaryKey/CombineKey/partitionKey")
+      }
+    }
+  }
+}
+
+object AlterTableCommand extends Logging {
+
+  /**
+    * Generate an commit with new schema to change the table's schema.
+    *
+    * @param internalSchema new schema after change
+    * @param historySchemaStr history schemas
+    * @param table The hoodie table.
+    * @param sparkSession The spark session.
+    */
+  def commitWithSchema(internalSchema: InternalSchema, historySchemaStr: String, table: CatalogTable, sparkSession: SparkSession): Unit = {
+    val schema = AvroInternalSchemaConverter.convert(internalSchema, table.identifier.table)
+    val path = getTableLocation(table, sparkSession)
+    val jsc = new JavaSparkContext(sparkSession.sparkContext)
+    val client = DataSourceUtils.createHoodieClient(jsc, schema.toString,
+      path, table.identifier.table, parametersWithWriteDefaults(table.storage.properties).asJava)
+
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    val metaClient = HoodieTableMetaClient.builder().setBasePath(path).setConf(hadoopConf).build()
+
+    val commitActionType = CommitUtils.getCommitActionType(WriteOperationType.ALTER_SCHEMA, metaClient.getTableType)
+    val instantTime = HoodieActiveTimeline.createNewInstantTime
+    client.startCommitWithTime(instantTime, commitActionType)
+
+    val hoodieTable = HoodieSparkTable.create(client.getConfig, client.getEngineContext)
+    val timeLine = hoodieTable.getActiveTimeline
+    val requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime)
+    val metadata = new HoodieCommitMetadata
+    metadata.setOperationType(WriteOperationType.ALTER_SCHEMA)
+    timeLine.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString.getBytes(StandardCharsets.UTF_8)))
+    val extraMeta = new util.HashMap[String, String]()
+    extraMeta.put(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(internalSchema.setSchemaId(instantTime.toLong)))
+    val schemaManager = new FileBasedInternalSchemaStorageManager(metaClient)
+    schemaManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(internalSchema, historySchemaStr))
+    client.commit(instantTime, jsc.emptyRDD, Option.of(extraMeta))
+    val existRoTable = sparkSession.catalog.tableExists(table.identifier.unquotedString + "_ro")
+    val existRtTable = sparkSession.catalog.tableExists(table.identifier.unquotedString + "_rt")
+    try {
+      sparkSession.catalog.refreshTable(table.identifier.unquotedString)
+      // try to refresh ro/rt table
+      if (existRoTable) sparkSession.catalog.refreshTable(table.identifier.unquotedString + "_ro")
+      if (existRoTable) sparkSession.catalog.refreshTable(table.identifier.unquotedString + "_rt")
+    } catch {
+      case NonFatal(e) =>
+        log.error(s"Exception when attempting to refresh table ${table.identifier.quotedString}", e)
+    }
+    // try to sync to hive
+    // drop partition field before call alter table
+    val fullSparkSchema = SparkInternalSchemaConverter.constructSparkSchemaFromInternalSchema(internalSchema)
+    val dataSparkSchema = new StructType(fullSparkSchema.fields.filter(p => !table.partitionColumnNames.exists(f => sparkSession.sessionState.conf.resolver(f, p.name))))
+    alterTableDataSchema(sparkSession, table.identifier.database.getOrElse("default"), table.identifier.table, dataSparkSchema)
+    if (existRoTable) alterTableDataSchema(sparkSession, table.identifier.database.getOrElse("default"), table.identifier.table + "_ro", dataSparkSchema)
+    if (existRtTable) alterTableDataSchema(sparkSession, table.identifier.database.getOrElse("default"), table.identifier.table + "_rt", dataSparkSchema)
+  }
+
+  def alterTableDataSchema(sparkSession: SparkSession, db: String, tableName: String, dataSparkSchema: StructType): Unit = {
+    sparkSession.sessionState.catalog
+      .externalCatalog
+      .alterTableDataSchema(db, tableName, dataSparkSchema)
+  }
+
+  def getTableLocation(table: CatalogTable, sparkSession: SparkSession): String = {
+    val uri = if (table.tableType == CatalogTableType.MANAGED) {
+      Some(sparkSession.sessionState.catalog.defaultTablePath(table.identifier))
+    } else {
+      table.storage.locationUri
+    }
+    val conf = sparkSession.sessionState.newHadoopConf()
+    uri.map(makePathQualified(_, conf))
+      .map(removePlaceHolder)
+      .getOrElse(throw new IllegalArgumentException(s"Missing location for ${table.identifier}"))
+  }
+
+  private def removePlaceHolder(path: String): String = {
+    if (path == null || path.length == 0) {
+      path
+    } else if (path.endsWith("-PLACEHOLDER")) {
+      path.substring(0, path.length() - 16)
+    } else {
+      path
+    }
+  }
+
+  def makePathQualified(path: URI, hadoopConf: Configuration): String = {
+    val hadoopPath = new Path(path)
+    val fs = hadoopPath.getFileSystem(hadoopConf)
+    fs.makeQualified(hadoopPath).toUri.toString
+  }
+
+  def getParentName(names: Array[String]): String = {
+    if (names.size > 1) {
+      names.dropRight(1).mkString(".")
+    } else ""
+  }
+
+  def parametersWithWriteDefaults(parameters: Map[String, String]): Map[String, String] = {
+    Map(OPERATION.key -> OPERATION.defaultValue,
+      TABLE_TYPE.key -> TABLE_TYPE.defaultValue,
+      PRECOMBINE_FIELD.key -> PRECOMBINE_FIELD.defaultValue,
+      HoodieWriteConfig.WRITE_PAYLOAD_CLASS_NAME.key -> HoodieWriteConfig.DEFAULT_WRITE_PAYLOAD_CLASS,
+      INSERT_DROP_DUPS.key -> INSERT_DROP_DUPS.defaultValue,
+      ASYNC_COMPACT_ENABLE.key -> ASYNC_COMPACT_ENABLE.defaultValue,
+      INLINE_CLUSTERING_ENABLE.key -> INLINE_CLUSTERING_ENABLE.defaultValue,
+      ASYNC_CLUSTERING_ENABLE.key -> ASYNC_CLUSTERING_ENABLE.defaultValue
+    ) ++ DataSourceOptionsHelper.translateConfigurations(parameters)
+  }
+}
+
diff --git a/hudi-spark-datasource/pom.xml b/hudi-spark-datasource/pom.xml
index a19a603599d5a..2111e59cea6c0 100644
--- a/hudi-spark-datasource/pom.xml
+++ b/hudi-spark-datasource/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-sync/hudi-datahub-sync/pom.xml b/hudi-sync/hudi-datahub-sync/pom.xml
new file mode 100644
index 0000000000000..aecc5dc7808f4
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/pom.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hudi</artifactId>
+    <groupId>org.apache.hudi</groupId>
+    <version>0.12.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>hudi-datahub-sync</artifactId>
+  <packaging>jar</packaging>
+
+  <properties>
+    <datahub.version>0.8.31</datahub.version>
+    <httpasync.version>4.1.5</httpasync.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>io.acryl</groupId>
+      <artifactId>datahub-client</artifactId>
+      <version>${datahub.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>fluent-hc</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpasyncclient</artifactId>
+      <version>${httpasync.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore-nio</artifactId>
+      <version>${http.version}</version>
+    </dependency>
+
+    <!-- Logging -->
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+    </dependency>
+
+    <!-- Hoodie -->
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-sync-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-api</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-engine</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.junit.vintage</groupId>
+      <artifactId>junit-vintage-engine</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-params</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>${maven-jar-plugin.version}</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncClient.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncClient.java
new file mode 100644
index 0000000000000..68569822cc30b
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncClient.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub;
+
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
+import org.apache.hudi.sync.common.HoodieSyncException;
+import org.apache.hudi.sync.datahub.config.DataHubSyncConfig;
+
+import com.linkedin.common.urn.DatasetUrn;
+import com.linkedin.data.template.SetMode;
+import com.linkedin.data.template.StringMap;
+import com.linkedin.dataset.DatasetProperties;
+import com.linkedin.schema.ArrayType;
+import com.linkedin.schema.BooleanType;
+import com.linkedin.schema.BytesType;
+import com.linkedin.schema.EnumType;
+import com.linkedin.schema.FixedType;
+import com.linkedin.schema.MapType;
+import com.linkedin.schema.NullType;
+import com.linkedin.schema.NumberType;
+import com.linkedin.schema.OtherSchema;
+import com.linkedin.schema.RecordType;
+import com.linkedin.schema.SchemaField;
+import com.linkedin.schema.SchemaFieldArray;
+import com.linkedin.schema.SchemaFieldDataType;
+import com.linkedin.schema.SchemaMetadata;
+import com.linkedin.schema.StringType;
+import com.linkedin.schema.UnionType;
+import datahub.client.rest.RestEmitter;
+import datahub.event.MetadataChangeProposalWrapper;
+import org.apache.avro.AvroTypeException;
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.parquet.schema.MessageType;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class DataHubSyncClient extends AbstractSyncHoodieClient {
+
+  private final HoodieTimeline activeTimeline;
+  private final DataHubSyncConfig syncConfig;
+  private final Configuration hadoopConf;
+  private final DatasetUrn datasetUrn;
+
+  public DataHubSyncClient(DataHubSyncConfig syncConfig, Configuration hadoopConf, FileSystem fs) {
+    super(syncConfig.basePath, syncConfig.assumeDatePartitioning, syncConfig.useFileListingFromMetadata, false, fs);
+    this.syncConfig = syncConfig;
+    this.hadoopConf = hadoopConf;
+    this.datasetUrn = syncConfig.datasetIdentifier.getDatasetUrn();
+    this.activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
+  }
+
+  @Override
+  public void createTable(String tableName,
+      MessageType storageSchema,
+      String inputFormatClass,
+      String outputFormatClass,
+      String serdeClass,
+      Map<String, String> serdeProperties,
+      Map<String, String> tableProperties) {
+    throw new UnsupportedOperationException("Not supported: `createTable`");
+  }
+
+  @Override
+  public boolean doesTableExist(String tableName) {
+    return tableExists(tableName);
+  }
+
+  @Override
+  public boolean tableExists(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `tableExists`");
+  }
+
+  @Override
+  public Option<String> getLastCommitTimeSynced(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `getLastCommitTimeSynced`");
+  }
+
+  @Override
+  public void updateLastCommitTimeSynced(String tableName) {
+    updateTableProperties(tableName, Collections.singletonMap(HOODIE_LAST_COMMIT_TIME_SYNC, activeTimeline.lastInstant().get().getTimestamp()));
+  }
+
+  @Override
+  public Option<String> getLastReplicatedTime(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `getLastReplicatedTime`");
+  }
+
+  @Override
+  public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
+    throw new UnsupportedOperationException("Not supported: `updateLastReplicatedTimeStamp`");
+  }
+
+  @Override
+  public void deleteLastReplicatedTimeStamp(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `deleteLastReplicatedTimeStamp`");
+  }
+
+  @Override
+  public void addPartitionsToTable(String tableName, List<String> partitionsToAdd) {
+    throw new UnsupportedOperationException("Not supported: `addPartitionsToTable`");
+  }
+
+  @Override
+  public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
+    throw new UnsupportedOperationException("Not supported: `updatePartitionsToTable`");
+  }
+
+  @Override
+  public void dropPartitions(String tableName, List<String> partitionsToDrop) {
+    throw new UnsupportedOperationException("Not supported: `dropPartitions`");
+  }
+
+  @Override
+  public void updateTableProperties(String tableName, Map<String, String> tableProperties) {
+    MetadataChangeProposalWrapper propertiesChangeProposal = MetadataChangeProposalWrapper.builder()
+        .entityType("dataset")
+        .entityUrn(datasetUrn)
+        .upsert()
+        .aspect(new DatasetProperties().setCustomProperties(new StringMap(tableProperties)))
+        .build();
+
+    try (RestEmitter emitter = syncConfig.getRestEmitter()) {
+      emitter.emit(propertiesChangeProposal, null).get();
+    } catch (Exception e) {
+      throw new HoodieDataHubSyncException("Fail to change properties for Dataset " + datasetUrn + ": " + tableProperties, e);
+    }
+  }
+
+  public void updateTableDefinition(String tableName) {
+    Schema avroSchema = getAvroSchemaWithoutMetadataFields(metaClient);
+    List<SchemaField> fields = avroSchema.getFields().stream().map(f -> new SchemaField()
+        .setFieldPath(f.name())
+        .setType(toSchemaFieldDataType(f.schema().getType()))
+        .setDescription(f.doc(), SetMode.IGNORE_NULL)
+        .setNativeDataType(f.schema().getType().getName())).collect(Collectors.toList());
+
+    final SchemaMetadata.PlatformSchema platformSchema = new SchemaMetadata.PlatformSchema();
+    platformSchema.setOtherSchema(new OtherSchema().setRawSchema(avroSchema.toString()));
+    MetadataChangeProposalWrapper schemaChangeProposal = MetadataChangeProposalWrapper.builder()
+        .entityType("dataset")
+        .entityUrn(datasetUrn)
+        .upsert()
+        .aspect(new SchemaMetadata()
+            .setSchemaName(tableName)
+            .setVersion(0)
+            .setHash("")
+            .setPlatform(datasetUrn.getPlatformEntity())
+            .setPlatformSchema(platformSchema)
+            .setFields(new SchemaFieldArray(fields)))
+        .build();
+
+    try (RestEmitter emitter = syncConfig.getRestEmitter()) {
+      emitter.emit(schemaChangeProposal, null).get();
+    } catch (Exception e) {
+      throw new HoodieDataHubSyncException("Fail to change schema for Dataset " + datasetUrn, e);
+    }
+  }
+
+  @Override
+  public Map<String, String> getTableSchema(String tableName) {
+    throw new UnsupportedOperationException("Not supported: `getTableSchema`");
+  }
+
+  @Override
+  public void close() {
+    // no op;
+  }
+
+  static Schema getAvroSchemaWithoutMetadataFields(HoodieTableMetaClient metaClient) {
+    try {
+      return new TableSchemaResolver(metaClient).getTableAvroSchema(true);
+    } catch (Exception e) {
+      throw new HoodieSyncException("Failed to read avro schema", e);
+    }
+  }
+
+  static SchemaFieldDataType toSchemaFieldDataType(Schema.Type type) {
+    switch (type) {
+      case BOOLEAN:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()));
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()));
+      case MAP:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new MapType()));
+      case ENUM:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType()));
+      case NULL:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NullType()));
+      case ARRAY:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new ArrayType()));
+      case BYTES:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType()));
+      case FIXED:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new FixedType()));
+      case UNION:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new UnionType()));
+      case RECORD:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()));
+      case STRING:
+        return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()));
+      default:
+        throw new AvroTypeException("Unexpected type: " + type.getName());
+    }
+  }
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncTool.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncTool.java
new file mode 100644
index 0000000000000..9633d6b089f12
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncTool.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.sync.common.AbstractSyncTool;
+import org.apache.hudi.sync.datahub.config.DataHubSyncConfig;
+
+import com.beust.jcommander.JCommander;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+
+/**
+ * To sync with DataHub via REST APIs.
+ *
+ * @Experimental
+ * @see <a href="https://datahubproject.io/">https://datahubproject.io/</a>
+ */
+public class DataHubSyncTool extends AbstractSyncTool {
+
+  private final DataHubSyncConfig config;
+
+  public DataHubSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
+    this(new DataHubSyncConfig(props), conf, fs);
+  }
+
+  public DataHubSyncTool(DataHubSyncConfig config, Configuration conf, FileSystem fs) {
+    super(config.getProps(), conf, fs);
+    this.config = config;
+  }
+
+  /**
+   * Sync to a DataHub Dataset.
+   *
+   * @implNote DataHub sync is an experimental feature, which overwrites the DataHub Dataset's schema
+   * and last commit time sync'ed upon every invocation.
+   */
+  @Override
+  public void syncHoodieTable() {
+    try (DataHubSyncClient syncClient = new DataHubSyncClient(config, conf, fs)) {
+      syncClient.updateTableDefinition(config.tableName);
+      syncClient.updateLastCommitTimeSynced(config.tableName);
+    }
+  }
+
+  public static void main(String[] args) {
+    final DataHubSyncConfig cfg = new DataHubSyncConfig();
+    JCommander cmd = new JCommander(cfg, null, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+    FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration());
+    new DataHubSyncTool(cfg, fs.getConf(), fs).syncHoodieTable();
+  }
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/HoodieDataHubSyncException.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/HoodieDataHubSyncException.java
new file mode 100644
index 0000000000000..6fb4bb4b7a04b
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/HoodieDataHubSyncException.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub;
+
+import org.apache.hudi.sync.common.HoodieSyncException;
+
+public class HoodieDataHubSyncException extends HoodieSyncException {
+
+  public HoodieDataHubSyncException(String message) {
+    super(message);
+  }
+
+  public HoodieDataHubSyncException(String message, Throwable t) {
+    super(message, t);
+  }
+
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubEmitterSupplier.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubEmitterSupplier.java
new file mode 100644
index 0000000000000..ca3baa0fcb751
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubEmitterSupplier.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub.config;
+
+import datahub.client.rest.RestEmitter;
+
+import java.util.function.Supplier;
+
+/**
+ * To supply a {@link RestEmitter} to sync with DataHub.
+ * <p>
+ * Implement this to have full control of the {@link RestEmitter}'s creation.
+ */
+public interface DataHubEmitterSupplier extends Supplier<RestEmitter> {
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java
new file mode 100644
index 0000000000000..1965b15cffb2a
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub.config;
+
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.sync.common.HoodieSyncConfig;
+
+import com.beust.jcommander.Parameter;
+import datahub.client.rest.RestEmitter;
+
+public class DataHubSyncConfig extends HoodieSyncConfig {
+
+  public static final ConfigProperty<String> META_SYNC_DATAHUB_DATASET_IDENTIFIER_CLASS = ConfigProperty
+      .key("hoodie.meta.sync.datahub.dataset.identifier.class")
+      .defaultValue(HoodieDataHubDatasetIdentifier.class.getName())
+      .withDocumentation("Pluggable class to help provide info to identify a DataHub Dataset.");
+
+  public static final ConfigProperty<String> META_SYNC_DATAHUB_EMITTER_SERVER = ConfigProperty
+      .key("hoodie.meta.sync.datahub.emitter.server")
+      .noDefaultValue()
+      .withDocumentation("Server URL of the DataHub instance.");
+
+  public static final ConfigProperty<String> META_SYNC_DATAHUB_EMITTER_TOKEN = ConfigProperty
+      .key("hoodie.meta.sync.datahub.emitter.token")
+      .noDefaultValue()
+      .withDocumentation("Auth token to connect to the DataHub instance.");
+
+  public static final ConfigProperty<String> META_SYNC_DATAHUB_EMITTER_SUPPLIER_CLASS = ConfigProperty
+      .key("hoodie.meta.sync.datahub.emitter.supplier.class")
+      .noDefaultValue()
+      .withDocumentation("Pluggable class to supply a DataHub REST emitter to connect to the DataHub instance. This overwrites other emitter configs.");
+
+  @Parameter(names = {"--identifier-class"}, description = "Pluggable class to help provide info to identify a DataHub Dataset.")
+  public String identifierClass;
+
+  @Parameter(names = {"--emitter-server"}, description = "Server URL of the DataHub instance.")
+  public String emitterServer;
+
+  @Parameter(names = {"--emitter-token"}, description = "Auth token to connect to the DataHub instance.")
+  public String emitterToken;
+
+  @Parameter(names = {"--emitter-supplier-class"}, description = "Pluggable class to supply a DataHub REST emitter to connect to the DataHub instance. This overwrites other emitter configs.")
+  public String emitterSupplierClass;
+
+  @Parameter(names = {"--help", "-h"}, help = true)
+  public Boolean help = false;
+
+  public final HoodieDataHubDatasetIdentifier datasetIdentifier;
+
+  public DataHubSyncConfig() {
+    this(new TypedProperties());
+  }
+
+  public DataHubSyncConfig(TypedProperties props) {
+    super(props);
+    identifierClass = getStringOrDefault(META_SYNC_DATAHUB_DATASET_IDENTIFIER_CLASS);
+    emitterServer = getStringOrDefault(META_SYNC_DATAHUB_EMITTER_SERVER, null);
+    emitterToken = getStringOrDefault(META_SYNC_DATAHUB_EMITTER_TOKEN, null);
+    emitterSupplierClass = getStringOrDefault(META_SYNC_DATAHUB_EMITTER_SUPPLIER_CLASS, null);
+
+    datasetIdentifier = (HoodieDataHubDatasetIdentifier) ReflectionUtils
+        .loadClass(identifierClass, new Class<?>[] {TypedProperties.class}, props);
+  }
+
+  public RestEmitter getRestEmitter() {
+    if (emitterSupplierClass != null) {
+      return ((DataHubEmitterSupplier) ReflectionUtils.loadClass(emitterSupplierClass)).get();
+    } else if (emitterServer != null) {
+      return RestEmitter.create(b -> b.server(emitterServer).token(emitterToken));
+    } else {
+      return RestEmitter.createWithDefaults();
+    }
+  }
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java
new file mode 100644
index 0000000000000..e3c1ad486c887
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub.config;
+
+import org.apache.hudi.common.config.TypedProperties;
+
+import com.linkedin.common.FabricType;
+import com.linkedin.common.urn.DataPlatformUrn;
+import com.linkedin.common.urn.DatasetUrn;
+
+/**
+ * Construct and provide the default {@link DatasetUrn} to identify the Dataset on DataHub.
+ * <p>
+ * Extend this to customize the way of constructing {@link DatasetUrn}.
+ */
+public class HoodieDataHubDatasetIdentifier {
+
+  public static final String DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME = "hudi";
+
+  protected final TypedProperties props;
+
+  public HoodieDataHubDatasetIdentifier(TypedProperties props) {
+    this.props = props;
+  }
+
+  public DatasetUrn getDatasetUrn() {
+    DataPlatformUrn dataPlatformUrn = new DataPlatformUrn(DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME);
+    DataHubSyncConfig config = new DataHubSyncConfig(props);
+    return new DatasetUrn(dataPlatformUrn, String.format("%s.%s", config.databaseName, config.tableName), FabricType.DEV);
+  }
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/test/java/org/apache/hudi/sync/datahub/config/TestDataHubSyncConfig.java b/hudi-sync/hudi-datahub-sync/src/test/java/org/apache/hudi/sync/datahub/config/TestDataHubSyncConfig.java
new file mode 100644
index 0000000000000..4fec62da739bb
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/test/java/org/apache/hudi/sync/datahub/config/TestDataHubSyncConfig.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.datahub.config;
+
+import org.apache.hudi.common.config.TypedProperties;
+
+import com.linkedin.common.FabricType;
+import com.linkedin.common.urn.DatasetUrn;
+import org.junit.jupiter.api.Test;
+
+import java.net.URISyntaxException;
+
+import static org.apache.hudi.sync.datahub.config.DataHubSyncConfig.META_SYNC_DATAHUB_DATASET_IDENTIFIER_CLASS;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class TestDataHubSyncConfig {
+
+  @Test
+  void testInstantiationWithProps() {
+    TypedProperties props = new TypedProperties();
+    props.setProperty(META_SYNC_DATAHUB_DATASET_IDENTIFIER_CLASS.key(), DummyIdentifier.class.getName());
+    DataHubSyncConfig syncConfig = new DataHubSyncConfig(props);
+    DatasetUrn datasetUrn = syncConfig.datasetIdentifier.getDatasetUrn();
+    assertEquals("foo", datasetUrn.getPlatformEntity().getPlatformNameEntity());
+    assertEquals("project.database.table", datasetUrn.getDatasetNameEntity());
+    assertEquals(FabricType.PROD, datasetUrn.getOriginEntity());
+  }
+
+  public static class DummyIdentifier extends HoodieDataHubDatasetIdentifier {
+
+    public DummyIdentifier(TypedProperties props) {
+      super(props);
+    }
+
+    @Override
+    public DatasetUrn getDatasetUrn() {
+      try {
+        return DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:foo,project.database.table,PROD)");
+      } catch (URISyntaxException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+}
diff --git a/hudi-sync/hudi-datahub-sync/src/test/resources/log4j-surefire-quiet.properties b/hudi-sync/hudi-datahub-sync/src/test/resources/log4j-surefire-quiet.properties
new file mode 100644
index 0000000000000..78d6cfe849883
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/test/resources/log4j-surefire-quiet.properties
@@ -0,0 +1,29 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=ERROR, CONSOLE
+log4j.logger.org.apache.hudi=ERROR
+
+# CONSOLE is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# CONSOLE uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-sync/hudi-datahub-sync/src/test/resources/log4j-surefire.properties b/hudi-sync/hudi-datahub-sync/src/test/resources/log4j-surefire.properties
new file mode 100644
index 0000000000000..7914f0a78273b
--- /dev/null
+++ b/hudi-sync/hudi-datahub-sync/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,29 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache.hudi=INFO
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-sync/hudi-dla-sync/pom.xml b/hudi-sync/hudi-dla-sync/pom.xml
index afb5717318f99..3770225ef7fcb 100644
--- a/hudi-sync/hudi-dla-sync/pom.xml
+++ b/hudi-sync/hudi-dla-sync/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncTool.java b/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncTool.java
index 2088d48d8a383..97838d03ed66b 100644
--- a/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncTool.java
+++ b/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/DLASyncTool.java
@@ -114,7 +114,7 @@ private void syncHoodieTable(String tableName, boolean useRealtimeInputFormat) {
     LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieDLAClient.getBasePath()
         + " of type " + hoodieDLAClient.getTableType());
     // Check if the necessary table exists
-    boolean tableExists = hoodieDLAClient.doesTableExist(tableName);
+    boolean tableExists = hoodieDLAClient.tableExists(tableName);
     // Get the parquet schema for this table looking at the latest commit
     MessageType schema = hoodieDLAClient.getDataSchema();
     // Sync schema if needed
diff --git a/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/HoodieDLAClient.java b/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/HoodieDLAClient.java
index 77d7362fa8166..10869eaf27b64 100644
--- a/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/HoodieDLAClient.java
+++ b/hudi-sync/hudi-dla-sync/src/main/java/org/apache/hudi/dla/HoodieDLAClient.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi.dla;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
@@ -31,14 +29,17 @@
 import org.apache.hudi.hive.SchemaDifference;
 import org.apache.hudi.hive.util.HiveSchemaUtil;
 import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.schema.MessageType;
 
 import java.io.IOException;
 import java.sql.Connection;
-import java.sql.DriverManager;
 import java.sql.DatabaseMetaData;
+import java.sql.DriverManager;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
@@ -115,7 +116,7 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
   }
 
   public Map<String, String> getTableSchema(String tableName) {
-    if (!doesTableExist(tableName)) {
+    if (!tableExists(tableName)) {
       throw new IllegalArgumentException(
           "Failed to get schema for table " + tableName + " does not exist");
     }
@@ -222,6 +223,11 @@ private void updateDLASQL(String sql) {
 
   @Override
   public boolean doesTableExist(String tableName) {
+    return tableExists(tableName);
+  }
+
+  @Override
+  public boolean tableExists(String tableName) {
     String sql = consutructShowCreateTableSQL(tableName);
     Statement stmt = null;
     ResultSet rs = null;
@@ -274,6 +280,22 @@ public void updateLastCommitTimeSynced(String tableName) {
     // TODO : dla do not support update tblproperties, so do nothing.
   }
 
+  @Override
+  public Option<String> getLastReplicatedTime(String tableName) {
+    // no op; unsupported
+    return Option.empty();
+  }
+
+  @Override
+  public void updateLastReplicatedTimeStamp(String tableName, String timeStamp) {
+    // no op; unsupported
+  }
+
+  @Override
+  public void deleteLastReplicatedTimeStamp(String tableName) {
+    // no op; unsupported
+  }
+
   @Override
   public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
     if (changedPartitions.isEmpty()) {
@@ -288,8 +310,8 @@ public void updatePartitionsToTable(String tableName, List<String> changedPartit
   }
 
   @Override
-  public void dropPartitionsToTable(String tableName, List<String> partitionsToDrop) {
-    throw new UnsupportedOperationException("Not support dropPartitionsToTable yet.");
+  public void dropPartitions(String tableName, List<String> partitionsToDrop) {
+    throw new UnsupportedOperationException("Not support dropPartitions yet.");
   }
 
   public Map<List<String>, String> scanTablePartitions(String tableName) {
@@ -370,6 +392,7 @@ public void updateTableDefinition(String tableName, SchemaDifference schemaDiff)
     }
   }
 
+  @Override
   public void close() {
     try {
       if (connection != null) {
diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml
index 19c1233d371bc..111e66b227563 100644
--- a/hudi-sync/hudi-hive-sync/pom.xml
+++ b/hudi-sync/hudi-hive-sync/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/AbstractHiveSyncHoodieClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/AbstractHiveSyncHoodieClient.java
new file mode 100644
index 0000000000000..f0641b6fc08b4
--- /dev/null
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/AbstractHiveSyncHoodieClient.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hive;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.TableSchemaResolver;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
+import org.apache.hudi.sync.common.HoodieSyncException;
+import org.apache.hudi.sync.common.model.Partition;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.parquet.schema.MessageType;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Base class to sync Hudi tables with Hive based metastores, such as Hive server, HMS or managed Hive services.
+ */
+public abstract class AbstractHiveSyncHoodieClient extends AbstractSyncHoodieClient {
+
+  protected final HoodieTimeline activeTimeline;
+  protected final HiveSyncConfig syncConfig;
+  protected final Configuration hadoopConf;
+  protected final PartitionValueExtractor partitionValueExtractor;
+
+  public AbstractHiveSyncHoodieClient(HiveSyncConfig syncConfig, Configuration hadoopConf, FileSystem fs) {
+    super(syncConfig.basePath, syncConfig.assumeDatePartitioning, syncConfig.useFileListingFromMetadata, syncConfig.withOperationField, fs);
+    this.syncConfig = syncConfig;
+    this.hadoopConf = hadoopConf;
+    this.partitionValueExtractor = ReflectionUtils.loadClass(syncConfig.partitionValueExtractorClass);
+    this.activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
+  }
+
+  public HoodieTimeline getActiveTimeline() {
+    return activeTimeline;
+  }
+
+  /**
+   * Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
+   * Generate a list of PartitionEvent based on the changes required.
+   */
+  protected List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
+    Map<String, String> paths = new HashMap<>();
+    for (Partition tablePartition : tablePartitions) {
+      List<String> hivePartitionValues = tablePartition.getValues();
+      String fullTablePartitionPath =
+          Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getStorageLocation())).toUri().getPath();
+      paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
+    }
+
+    List<PartitionEvent> events = new ArrayList<>();
+    for (String storagePartition : partitionStoragePartitions) {
+      Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition);
+      String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
+      // Check if the partition values or if hdfs path is the same
+      List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
+
+      if (isDropPartition) {
+        events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
+      } else {
+        if (!storagePartitionValues.isEmpty()) {
+          String storageValue = String.join(", ", storagePartitionValues);
+          if (!paths.containsKey(storageValue)) {
+            events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
+          } else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
+            events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
+          }
+        }
+      }
+    }
+    return events;
+  }
+
+  /**
+   * Get all partitions for the table in the metastore.
+   */
+  public abstract List<Partition> getAllPartitions(String tableName);
+
+  /**
+   * Check if a database already exists in the metastore.
+   */
+  public abstract boolean databaseExists(String databaseName);
+
+  /**
+   * Create a database in the metastore.
+   */
+  public abstract void createDatabase(String databaseName);
+
+  /**
+   * Update schema for the table in the metastore.
+   */
+  public abstract void updateTableDefinition(String tableName, MessageType newSchema);
+
+  /*
+   * APIs below need to be re-worked by modeling field comment in hudi-sync-common,
+   * instead of relying on Avro or Hive schema class.
+   */
+
+  public Schema getAvroSchemaWithoutMetadataFields() {
+    try {
+      return new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields();
+    } catch (Exception e) {
+      throw new HoodieSyncException("Failed to read avro schema", e);
+    }
+  }
+
+  public abstract List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName);
+
+  public abstract void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema);
+
+  public abstract void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String, String> newComments);
+
+  /*
+   * APIs above need to be re-worked by modeling field comment in hudi-sync-common,
+   * instead of relying on Avro or Hive schema class.
+   */
+}
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
index eb4fc62d40d6d..36dba81a33a27 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
@@ -55,7 +55,7 @@ public class HiveSyncConfig extends HoodieSyncConfig {
   @Parameter(names = {"--use-jdbc"}, description = "Hive jdbc connect url")
   public Boolean useJdbc;
 
-  @Parameter(names = {"--sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql")
+  @Parameter(names = {"--sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms,glue,jdbc and hiveql")
   public String syncMode;
 
   @Parameter(names = {"--auto-create-database"}, description = "Auto create hive database")
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
index cac70ab5446e4..939fc114c0883 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
@@ -18,14 +18,6 @@
 
 package org.apache.hudi.hive;
 
-import com.beust.jcommander.JCommander;
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Partition;
-
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieFileFormat;
@@ -41,7 +33,14 @@
 import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent;
 import org.apache.hudi.sync.common.AbstractSyncHoodieClient.PartitionEvent.PartitionEventType;
 import org.apache.hudi.sync.common.AbstractSyncTool;
+import org.apache.hudi.sync.common.model.Partition;
 
+import com.beust.jcommander.JCommander;
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.schema.GroupType;
@@ -66,35 +65,33 @@
  * partitions incrementally (all the partitions modified since the last commit)
  */
 @SuppressWarnings("WeakerAccess")
-public class HiveSyncTool extends AbstractSyncTool {
+public class HiveSyncTool extends AbstractSyncTool implements AutoCloseable {
 
   private static final Logger LOG = LogManager.getLogger(HiveSyncTool.class);
   public static final String SUFFIX_SNAPSHOT_TABLE = "_rt";
   public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro";
 
-  protected final HiveSyncConfig hiveSyncConfig;
-  protected HoodieHiveClient hoodieHiveClient = null;
+  protected HiveSyncConfig hiveSyncConfig;
+  protected AbstractHiveSyncHoodieClient hoodieHiveClient;
   protected String snapshotTableName = null;
   protected Option<String> roTableName = null;
 
   public HiveSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
-    super(props, conf, fs);
-    this.hiveSyncConfig = new HiveSyncConfig(props);
-    init(hiveSyncConfig, new HiveConf(conf, HiveConf.class));
+    this(new HiveSyncConfig(props), new HiveConf(conf, HiveConf.class), fs);
   }
 
-  @Deprecated
   public HiveSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
     super(hiveSyncConfig.getProps(), hiveConf, fs);
-    this.hiveSyncConfig = hiveSyncConfig;
-    init(hiveSyncConfig, hiveConf);
+    // TODO: reconcile the way to set METASTOREURIS
+    if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
+      hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
+    }
+    initClient(hiveSyncConfig, hiveConf);
+    initConfig(hiveSyncConfig);
   }
 
-  private void init(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
+  protected void initClient(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
     try {
-      if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
-        hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
-      }
       this.hoodieHiveClient = new HoodieHiveClient(hiveSyncConfig, hiveConf, fs);
     } catch (RuntimeException e) {
       if (hiveSyncConfig.ignoreExceptions) {
@@ -103,12 +100,16 @@ private void init(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
         throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e);
       }
     }
+  }
 
+  private void initConfig(HiveSyncConfig hiveSyncConfig) {
     // Set partitionFields to empty, when the NonPartitionedExtractor is used
+    // TODO: HiveSyncConfig should be responsible for inferring config value
     if (NonPartitionedExtractor.class.getName().equals(hiveSyncConfig.partitionValueExtractorClass)) {
       LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
       hiveSyncConfig.partitionFields = new ArrayList<>();
     }
+    this.hiveSyncConfig = hiveSyncConfig;
     if (hoodieHiveClient != null) {
       switch (hoodieHiveClient.getTableType()) {
         case COPY_ON_WRITE:
@@ -139,9 +140,7 @@ public void syncHoodieTable() {
     } catch (RuntimeException re) {
       throw new HoodieException("Got runtime exception when hive syncing " + hiveSyncConfig.tableName, re);
     } finally {
-      if (hoodieHiveClient != null) {
-        hoodieHiveClient.close();
-      }
+      close();
     }
   }
 
@@ -162,6 +161,17 @@ protected void doSync() {
     }
   }
 
+  @Override
+  public void close() {
+    if (hoodieHiveClient != null) {
+      try {
+        hoodieHiveClient.close();
+      } catch (Exception e) {
+        throw new HoodieHiveSyncException("Fail to close sync client.", e);
+      }
+    }
+  }
+
   protected void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
                                boolean readAsOptimized) {
     LOG.info("Trying to sync hoodie table " + tableName + " with base path " + hoodieHiveClient.getBasePath()
@@ -170,7 +180,7 @@ protected void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
     // check if the database exists else create it
     if (hiveSyncConfig.autoCreateDatabase) {
       try {
-        if (!hoodieHiveClient.doesDataBaseExist(hiveSyncConfig.databaseName)) {
+        if (!hoodieHiveClient.databaseExists(hiveSyncConfig.databaseName)) {
           hoodieHiveClient.createDatabase(hiveSyncConfig.databaseName);
         }
       } catch (Exception e) {
@@ -178,14 +188,14 @@ protected void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
         LOG.warn("Unable to create database", e);
       }
     } else {
-      if (!hoodieHiveClient.doesDataBaseExist(hiveSyncConfig.databaseName)) {
+      if (!hoodieHiveClient.databaseExists(hiveSyncConfig.databaseName)) {
         LOG.error("Hive database does not exist " + hiveSyncConfig.databaseName);
         throw new HoodieHiveSyncException("hive database does not exist " + hiveSyncConfig.databaseName);
       }
     }
 
     // Check if the necessary table exists
-    boolean tableExists = hoodieHiveClient.doesTableExist(tableName);
+    boolean tableExists = hoodieHiveClient.tableExists(tableName);
 
     // check if isDropPartition
     boolean isDropPartition = hoodieHiveClient.isDropPartition();
@@ -375,7 +385,7 @@ private Map<String, String> getSparkSerdeProperties(boolean readAsOptimized) {
   private boolean syncPartitions(String tableName, List<String> writtenPartitionsSince, boolean isDropPartition) {
     boolean partitionsChanged;
     try {
-      List<Partition> hivePartitions = hoodieHiveClient.scanTablePartitions(tableName);
+      List<Partition> hivePartitions = hoodieHiveClient.getAllPartitions(tableName);
       List<PartitionEvent> partitionEvents =
           hoodieHiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, isDropPartition);
 
@@ -394,7 +404,7 @@ private boolean syncPartitions(String tableName, List<String> writtenPartitionsS
       List<String> dropPartitions = filterPartitions(partitionEvents, PartitionEventType.DROP);
       if (!dropPartitions.isEmpty()) {
         LOG.info("Drop Partitions " + dropPartitions);
-        hoodieHiveClient.dropPartitionsToTable(tableName, dropPartitions);
+        hoodieHiveClient.dropPartitions(tableName, dropPartitions);
       }
 
       partitionsChanged = !updatePartitions.isEmpty() || !newPartitions.isEmpty() || !dropPartitions.isEmpty();
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java
index 70a88a7aabbd1..539d18a213d70 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java
@@ -18,10 +18,7 @@
 
 package org.apache.hudi.hive;
 
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.ImmutablePair;
@@ -30,18 +27,14 @@
 import org.apache.hudi.hive.ddl.HiveQueryDDLExecutor;
 import org.apache.hudi.hive.ddl.HiveSyncMode;
 import org.apache.hudi.hive.ddl.JDBCExecutor;
-import org.apache.hudi.hive.util.HiveSchemaUtil;
-import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
-import org.apache.hudi.sync.common.HoodieSyncException;
+import org.apache.hudi.sync.common.model.Partition;
 
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.log4j.LogManager;
@@ -49,7 +42,6 @@
 import org.apache.parquet.schema.MessageType;
 import org.apache.thrift.TException;
 
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
@@ -57,22 +49,19 @@
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
+import static org.apache.hudi.sync.common.util.TableUtils.tableId;
 
-public class HoodieHiveClient extends AbstractSyncHoodieClient {
-
-  private static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
-  private static final String HIVE_ESCAPE_CHARACTER = HiveSchemaUtil.HIVE_ESCAPE_CHARACTER;
+/**
+ * This class implements logic to sync a Hudi table with either the Hive server or the Hive Metastore.
+ */
+public class HoodieHiveClient extends AbstractHiveSyncHoodieClient {
 
   private static final Logger LOG = LogManager.getLogger(HoodieHiveClient.class);
-  private final PartitionValueExtractor partitionValueExtractor;
-  private final HoodieTimeline activeTimeline;
   DDLExecutor ddlExecutor;
   private IMetaStoreClient client;
-  private final HiveSyncConfig syncConfig;
 
   public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
-    super(cfg.basePath, cfg.assumeDatePartitioning, cfg.useFileListingFromMetadata,  cfg.withOperationField, fs);
-    this.syncConfig = cfg;
+    super(cfg, configuration, fs);
 
     // Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should
     // disable jdbc and depend on metastore client for all hive registrations
@@ -99,20 +88,6 @@ public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem f
     } catch (Exception e) {
       throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e);
     }
-
-    try {
-      this.partitionValueExtractor =
-          (PartitionValueExtractor) Class.forName(cfg.partitionValueExtractorClass).newInstance();
-    } catch (Exception e) {
-      throw new HoodieHiveSyncException(
-          "Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass, e);
-    }
-
-    activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
-  }
-
-  public HoodieTimeline getActiveTimeline() {
-    return activeTimeline;
   }
 
   /**
@@ -135,7 +110,7 @@ public void updatePartitionsToTable(String tableName, List<String> changedPartit
    * Partition path has changed - drop the following partitions.
    */
   @Override
-  public void dropPartitionsToTable(String tableName, List<String> partitionsToDrop) {
+  public void dropPartitions(String tableName, List<String> partitionsToDrop) {
     ddlExecutor.dropPartitionsToTable(tableName, partitionsToDrop);
   }
 
@@ -159,61 +134,33 @@ public void updateTableProperties(String tableName, Map<String, String> tablePro
     }
   }
 
-  /**
-   * Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
-   * Generate a list of PartitionEvent based on the changes required.
-   */
-  List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions) {
-    return getPartitionEvents(tablePartitions, partitionStoragePartitions, false);
-  }
-
-  /**
-   * Iterate over the storage partitions and find if there are any new partitions that need to be added or updated.
-   * Generate a list of PartitionEvent based on the changes required.
-   */
-  List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions, List<String> partitionStoragePartitions, boolean isDropPartition) {
-    Map<String, String> paths = new HashMap<>();
-    for (Partition tablePartition : tablePartitions) {
-      List<String> hivePartitionValues = tablePartition.getValues();
-      String fullTablePartitionPath =
-          Path.getPathWithoutSchemeAndAuthority(new Path(tablePartition.getSd().getLocation())).toUri().getPath();
-      paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
-    }
-
-    List<PartitionEvent> events = new ArrayList<>();
-    for (String storagePartition : partitionStoragePartitions) {
-      Path storagePartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, storagePartition);
-      String fullStoragePartitionPath = Path.getPathWithoutSchemeAndAuthority(storagePartitionPath).toUri().getPath();
-      // Check if the partition values or if hdfs path is the same
-      List<String> storagePartitionValues = partitionValueExtractor.extractPartitionValuesInPath(storagePartition);
-
-      if (isDropPartition) {
-        events.add(PartitionEvent.newPartitionDropEvent(storagePartition));
-      } else {
-        if (!storagePartitionValues.isEmpty()) {
-          String storageValue = String.join(", ", storagePartitionValues);
-          if (!paths.containsKey(storageValue)) {
-            events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
-          } else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
-            events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
-          }
-        }
-      }
-    }
-    return events;
-  }
-
   /**
    * Scan table partitions.
+   *
+   * @deprecated Use {@link #getAllPartitions} instead.
    */
-  public List<Partition> scanTablePartitions(String tableName) throws TException {
+  @Deprecated
+  public List<org.apache.hadoop.hive.metastore.api.Partition> scanTablePartitions(String tableName) throws TException {
     return client.listPartitions(syncConfig.databaseName, tableName, (short) -1);
   }
 
-  void updateTableDefinition(String tableName, MessageType newSchema) {
+  @Override
+  public void updateTableDefinition(String tableName, MessageType newSchema) {
     ddlExecutor.updateTableDefinition(tableName, newSchema);
   }
 
+  @Override
+  public List<Partition> getAllPartitions(String tableName) {
+    try {
+      return client.listPartitions(syncConfig.databaseName, tableName, (short) -1)
+          .stream()
+          .map(p -> new Partition(p.getValues(), p.getSd().getLocation()))
+          .collect(Collectors.toList());
+    } catch (TException e) {
+      throw new HoodieHiveSyncException("Failed to get all partitions for table " + tableId(syncConfig.databaseName, tableName), e);
+    }
+  }
+
   @Override
   public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
                           String outputFormatClass, String serdeClass,
@@ -226,18 +173,21 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
    */
   @Override
   public Map<String, String> getTableSchema(String tableName) {
-    if (!doesTableExist(tableName)) {
+    if (!tableExists(tableName)) {
       throw new IllegalArgumentException(
           "Failed to get schema for table " + tableName + " does not exist");
     }
     return ddlExecutor.getTableSchema(tableName);
   }
 
-  /**
-   * @return true if the configured table exists
-   */
+  @Deprecated
   @Override
   public boolean doesTableExist(String tableName) {
+    return tableExists(tableName);
+  }
+
+  @Override
+  public boolean tableExists(String tableName) {
     try {
       return client.tableExists(syncConfig.databaseName, tableName);
     } catch (TException e) {
@@ -245,11 +195,13 @@ public boolean doesTableExist(String tableName) {
     }
   }
 
-  /**
-   * @param databaseName
-   * @return true if the configured database exists
-   */
+  @Deprecated
   public boolean doesDataBaseExist(String databaseName) {
+    return databaseExists(databaseName);
+  }
+
+  @Override
+  public boolean databaseExists(String databaseName) {
     try {
       client.getDatabase(databaseName);
       return true;
@@ -261,6 +213,7 @@ public boolean doesDataBaseExist(String databaseName) {
     }
   }
 
+  @Override
   public void createDatabase(String databaseName) {
     ddlExecutor.createDatabase(databaseName);
   }
@@ -321,6 +274,7 @@ public void deleteLastReplicatedTimeStamp(String tableName) {
     }
   }
 
+  @Override
   public void close() {
     try {
       ddlExecutor.close();
@@ -333,10 +287,6 @@ public void close() {
     }
   }
 
-  List<String> getAllTables(String db) throws Exception {
-    return client.getAllTables(db);
-  }
-
   @Override
   public void updateLastCommitTimeSynced(String tableName) {
     // Set the last commit time from the TBLproperties
@@ -352,14 +302,7 @@ public void updateLastCommitTimeSynced(String tableName) {
     }
   }
 
-  public Schema getAvroSchemaWithoutMetadataFields() {
-    try {
-      return new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields();
-    } catch (Exception e) {
-      throw new HoodieSyncException("Failed to read avro schema", e);
-    }
-  }
-
+  @Override
   public List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName) {
     try {
       return client.getSchema(syncConfig.databaseName, tableName);
@@ -368,11 +311,13 @@ public List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName) {
     }
   }
 
+  @Override
   public void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema) {
     Map<String,String> newComments = newSchema.stream().collect(Collectors.toMap(field -> field.name().toLowerCase(Locale.ROOT), field -> StringUtils.isNullOrEmpty(field.doc()) ? "" : field.doc()));
     updateTableComments(tableName,oldSchema,newComments);
   }
 
+  @Override
   public void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String,String> newComments) {
     Map<String,String> oldComments = oldSchema.stream().collect(Collectors.toMap(fieldSchema -> fieldSchema.getName().toLowerCase(Locale.ROOT),
         fieldSchema -> StringUtils.isNullOrEmpty(fieldSchema.getComment()) ? "" : fieldSchema.getComment()));
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncException.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncException.java
index d52ac71aa3f16..f4ece02389195 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncException.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncException.java
@@ -20,10 +20,6 @@
 
 public class HoodieHiveSyncException extends RuntimeException {
 
-  public HoodieHiveSyncException() {
-    super();
-  }
-
   public HoodieHiveSyncException(String message) {
     super(message);
   }
@@ -32,11 +28,4 @@ public HoodieHiveSyncException(String message, Throwable t) {
     super(message, t);
   }
 
-  public HoodieHiveSyncException(Throwable t) {
-    super(t);
-  }
-
-  protected static String format(String message, Object... args) {
-    return String.format(String.valueOf(message), (Object[]) args);
-  }
 }
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
index 8cab505f1465b..7b22e56d4538c 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
@@ -30,11 +30,12 @@
  * There are two main implementations one is QueryBased other is based on HiveMetaStore
  * QueryBasedDDLExecutor also has two implementations namely HiveQL based and other JDBC based.
  */
-public interface DDLExecutor {
+public interface DDLExecutor extends AutoCloseable {
+
   /**
    * @param databaseName name of database to be created.
    */
-  public void createDatabase(String databaseName);
+  void createDatabase(String databaseName);
 
   /**
    * Creates a table with the following properties.
@@ -47,9 +48,9 @@ public interface DDLExecutor {
    * @param serdeProperties
    * @param tableProperties
    */
-  public void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
-                          String outputFormatClass, String serdeClass,
-                          Map<String, String> serdeProperties, Map<String, String> tableProperties);
+  void createTable(String tableName, MessageType storageSchema, String inputFormatClass,
+                   String outputFormatClass, String serdeClass,
+                   Map<String, String> serdeProperties, Map<String, String> tableProperties);
 
   /**
    * Updates the table with the newSchema.
@@ -57,7 +58,7 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
    * @param tableName
    * @param newSchema
    */
-  public void updateTableDefinition(String tableName, MessageType newSchema);
+  void updateTableDefinition(String tableName, MessageType newSchema);
 
   /**
    * Fetches tableSchema for a table.
@@ -65,7 +66,7 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
    * @param tableName
    * @return
    */
-  public Map<String, String> getTableSchema(String tableName);
+  Map<String, String> getTableSchema(String tableName);
 
   /**
    * Adds partition to table.
@@ -73,7 +74,7 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
    * @param tableName
    * @param partitionsToAdd
    */
-  public void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
+  void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
 
   /**
    * Updates partitions for a given table.
@@ -81,7 +82,7 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
    * @param tableName
    * @param changedPartitions
    */
-  public void updatePartitionsToTable(String tableName, List<String> changedPartitions);
+  void updatePartitionsToTable(String tableName, List<String> changedPartitions);
 
   /**
    * Drop partitions for a given table.
@@ -89,15 +90,13 @@ public void createTable(String tableName, MessageType storageSchema, String inpu
    * @param tableName
    * @param partitionsToDrop
    */
-  public void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
+  void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
 
   /**
    * update table comments
    *
    * @param tableName
-   * @param newSchema
+   * @param newSchema Map key: field name, Map value: [field type, field comment]
    */
-  public void updateTableComments(String tableName, Map<String, ImmutablePair<String,String>> newSchema);
-
-  public void close();
+  void updateTableComments(String tableName, Map<String, ImmutablePair<String, String>> newSchema);
 }
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
index f2e9905350192..868f59b4fe6c9 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
@@ -238,8 +238,11 @@ public void dropPartitionsToTable(String tableName, List<String> partitionsToDro
     LOG.info("Drop partitions " + partitionsToDrop.size() + " on " + tableName);
     try {
       for (String dropPartition : partitionsToDrop) {
-        String partitionClause = HivePartitionUtil.getPartitionClauseForDrop(dropPartition, partitionValueExtractor, syncConfig);
-        client.dropPartition(syncConfig.databaseName, tableName, partitionClause, false);
+        if (HivePartitionUtil.partitionExists(client, tableName, dropPartition, partitionValueExtractor, syncConfig)) {
+          String partitionClause =
+              HivePartitionUtil.getPartitionClauseForDrop(dropPartition, partitionValueExtractor, syncConfig);
+          client.dropPartition(syncConfig.databaseName, tableName, partitionClause, false);
+        }
         LOG.info("Drop partition " + dropPartition + " on " + tableName);
       }
     } catch (TException e) {
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
index a4debfbab9603..4b8ceec952bb4 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
@@ -137,8 +137,12 @@ public void dropPartitionsToTable(String tableName, List<String> partitionsToDro
     LOG.info("Drop partitions " + partitionsToDrop.size() + " on " + tableName);
     try {
       for (String dropPartition : partitionsToDrop) {
-        String partitionClause = HivePartitionUtil.getPartitionClauseForDrop(dropPartition, partitionValueExtractor, config);
-        metaStoreClient.dropPartition(config.databaseName, tableName, partitionClause, false);
+        if (HivePartitionUtil.partitionExists(metaStoreClient, tableName, dropPartition, partitionValueExtractor,
+            config)) {
+          String partitionClause =
+              HivePartitionUtil.getPartitionClauseForDrop(dropPartition, partitionValueExtractor, config);
+          metaStoreClient.dropPartition(config.databaseName, tableName, partitionClause, false);
+        }
         LOG.info("Drop partition " + dropPartition + " on " + tableName);
       }
     } catch (Exception e) {
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveSyncMode.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveSyncMode.java
index 7e011538c0ff6..abe044cb114f4 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveSyncMode.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveSyncMode.java
@@ -26,6 +26,10 @@ public enum HiveSyncMode {
    * The HMS mode use the hive meta client to sync metadata.
    */
   HMS,
+  /**
+   * The GLUE mode use the glue client to sync metadata.
+   */
+  GLUE,
   /**
    * The HIVEQL mode execute hive ql to sync metadata.
    */
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/GlobalHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/GlobalHiveSyncTool.java
index 51b2a77ae7433..a7d205962e25c 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/GlobalHiveSyncTool.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/GlobalHiveSyncTool.java
@@ -55,10 +55,6 @@ protected void syncHoodieTable(String tableName, boolean useRealtimeInputFormat,
     LOG.info("Sync complete for " + tableName);
   }
 
-  public void close() {
-    hoodieHiveClient.close();
-  }
-
   public Map<String, Option<String>> getLastReplicatedTimeStampMap() {
     Map<String, Option<String>> timeStampMap = new HashMap<>();
     Option<String> timeStamp = hoodieHiveClient.getLastReplicatedTime(snapshotTableName);
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HivePartitionUtil.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HivePartitionUtil.java
index 27e3a73cee5a9..0258cfc5efa53 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HivePartitionUtil.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HivePartitionUtil.java
@@ -18,15 +18,22 @@
 
 package org.apache.hudi.hive.util;
 
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hudi.common.util.PartitionPathEncodeUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.hive.HiveSyncConfig;
+import org.apache.hudi.hive.HoodieHiveSyncException;
 import org.apache.hudi.hive.PartitionValueExtractor;
-
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.thrift.TException;
 
 public class HivePartitionUtil {
+  private static final Logger LOG = LogManager.getLogger(HivePartitionUtil.class);
 
   /**
    * Build String, example as year=2021/month=06/day=25
@@ -48,4 +55,19 @@ public static String getPartitionClauseForDrop(String partition, PartitionValueE
     }
     return String.join("/", partBuilder);
   }
+
+  public static Boolean partitionExists(IMetaStoreClient client, String tableName, String partitionPath,
+                                        PartitionValueExtractor partitionValueExtractor, HiveSyncConfig config) {
+    Partition newPartition;
+    try {
+      List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partitionPath);
+      newPartition = client.getPartition(config.databaseName, tableName, partitionValues);
+    } catch (NoSuchObjectException ignored) {
+      newPartition = null;
+    } catch (TException e) {
+      LOG.error("Failed to get partition " + partitionPath, e);
+      throw new HoodieHiveSyncException("Failed to get partition " + partitionPath, e);
+    }
+    return newPartition != null;
+  }
 }
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
index 0e23615d5dadd..1c2d53ed96ded 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
@@ -138,12 +138,12 @@ public void testBasicSync(boolean useSchemaFromCommitMetadata, String syncMode)
     HiveTestUtil.createCOWTable(instantTime, 5, useSchemaFromCommitMetadata);
 
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
     // Lets do the sync
     reSyncHiveTable();
 
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size() + 1,
@@ -176,9 +176,9 @@ public void testBasicSync(boolean useSchemaFromCommitMetadata, String syncMode)
     ddlExecutor.runSQL("ALTER TABLE `" + HiveTestUtil.TABLE_NAME
         + "` PARTITION (`datestr`='2050-01-01') SET LOCATION '/some/new/location'");
 
-    List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
+    List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
     List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.empty());
-    List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
+    List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
     assertEquals(1, partitionEvents.size(), "There should be only one partition event");
     assertEquals(PartitionEventType.UPDATE, partitionEvents.iterator().next().eventType,
         "The one partition event must of type UPDATE");
@@ -211,20 +211,20 @@ public void testSyncDataBase(String syncMode) throws Exception {
     hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "true");
     reinitHiveSyncClient();
     assertDoesNotThrow((this::reSyncHiveTable));
-    assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
+    assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
         "DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
 
     // while autoCreateDatabase is false and database exists;
     hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "false");
     reinitHiveSyncClient();
     assertDoesNotThrow((this::reSyncHiveTable));
-    assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
+    assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
         "DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
 
     // while autoCreateDatabase is true and database exists;
     hiveSyncProps.setProperty(HiveSyncConfig.HIVE_AUTO_CREATE_DATABASE.key(), "true");
     assertDoesNotThrow((this::reSyncHiveTable));
-    assertTrue(hiveClient.doesDataBaseExist(HiveTestUtil.DB_NAME),
+    assertTrue(hiveClient.databaseExists(HiveTestUtil.DB_NAME),
         "DataBases " + HiveTestUtil.DB_NAME + " should exist after sync completes");
   }
 
@@ -457,8 +457,8 @@ public void testSyncIncremental(String syncMode) throws Exception {
     reSyncHiveTable();
     List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(commitTime1));
     assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
-    List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
-    List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
+    List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+    List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
     assertEquals(1, partitionEvents.size(), "There should be only one partition event");
     assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
 
@@ -581,11 +581,11 @@ public void testSyncMergeOnRead(boolean useSchemaFromCommitMetadata, String sync
 
     String roTableName = HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(roTableName), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
+    assertFalse(hiveClient.tableExists(roTableName), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
     // Lets do the sync
     reSyncHiveTable();
 
-    assertTrue(hiveClient.doesTableExist(roTableName), "Table " + roTableName + " should exist after sync completes");
+    assertTrue(hiveClient.tableExists(roTableName), "Table " + roTableName + " should exist after sync completes");
 
     if (useSchemaFromCommitMetadata) {
       assertEquals(hiveClient.getTableSchema(roTableName).size(),
@@ -643,14 +643,14 @@ public void testSyncMergeOnReadRT(boolean useSchemaFromCommitMetadata, String sy
     String snapshotTableName = HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
     HiveTestUtil.createMORTable(instantTime, deltaCommitTime, 5, true, useSchemaFromCommitMetadata);
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(snapshotTableName),
+    assertFalse(hiveClient.tableExists(snapshotTableName),
         "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
             + " should not exist initially");
 
     // Lets do the sync
     reSyncHiveTable();
 
-    assertTrue(hiveClient.doesTableExist(snapshotTableName),
+    assertTrue(hiveClient.tableExists(snapshotTableName),
         "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
             + " should exist after sync completes");
 
@@ -713,11 +713,11 @@ public void testMultiPartitionKeySync(String syncMode) throws Exception {
     HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." + HiveTestUtil.TABLE_NAME);
 
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
     // Lets do the sync
     reSyncHiveTable();
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size() + 3,
@@ -736,8 +736,8 @@ public void testMultiPartitionKeySync(String syncMode) throws Exception {
     reinitHiveSyncClient();
     List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(instantTime));
     assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
-    List<Partition> hivePartitions = hiveClient.scanTablePartitions(HiveTestUtil.TABLE_NAME);
-    List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
+    List<org.apache.hudi.sync.common.model.Partition> hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+    List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince, false);
     assertEquals(1, partitionEvents.size(), "There should be only one partition event");
     assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
 
@@ -755,7 +755,7 @@ public void testMultiPartitionKeySync(String syncMode) throws Exception {
 
     reinitHiveSyncClient();
     reSyncHiveTable();
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size() + 3,
@@ -776,12 +776,12 @@ public void testDropPartitionKeySync(String syncMode) throws Exception {
     HiveTestUtil.createCOWTable(instantTime, 1, true);
 
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
     // Lets do the sync
     reSyncHiveTable();
 
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size() + 1,
@@ -820,11 +820,11 @@ public void testDropPartition(String syncMode) throws Exception {
     HiveTestUtil.createCOWTable(instantTime, 1, true);
 
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
     // Lets do the sync
     reSyncHiveTable();
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size() + 1,
@@ -860,11 +860,11 @@ public void testNonPartitionedSync(String syncMode) throws Exception {
     HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." + HiveTestUtil.TABLE_NAME);
 
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
     // Lets do the sync
     reSyncHiveTable();
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size(),
@@ -882,13 +882,13 @@ public void testReadSchemaForMOR(String syncMode) throws Exception {
     HiveTestUtil.createMORTable(commitTime, "", 5, false, true);
     reinitHiveSyncClient();
 
-    assertFalse(hiveClient.doesTableExist(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+    assertFalse(hiveClient.tableExists(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
         + " should not exist initially");
 
     // Lets do the sync
     reSyncHiveTable();
 
-    assertTrue(hiveClient.doesTableExist(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+    assertTrue(hiveClient.tableExists(snapshotTableName), "Table " + HiveTestUtil.TABLE_NAME + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
         + " should exist after sync completes");
 
     // Schema being read from compacted base files
@@ -925,7 +925,7 @@ public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxE
     HiveTestUtil.createCOWTable(instantTime, 5, false);
     reinitHiveSyncClient();
     HoodieHiveClient prevHiveClient = hiveClient;
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
 
     // Lets do the sync
@@ -936,12 +936,12 @@ public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxE
     reSyncHiveTable();
 
     assertNull(hiveClient);
-    assertFalse(prevHiveClient.doesTableExist(HiveTestUtil.TABLE_NAME),
+    assertFalse(prevHiveClient.tableExists(HiveTestUtil.TABLE_NAME),
         "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
   }
 
   private void verifyOldParquetFileTest(HoodieHiveClient hiveClient, String emptyCommitTime) throws Exception {
-    assertTrue(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
+    assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync completes");
     assertEquals(hiveClient.getTableSchema(HiveTestUtil.TABLE_NAME).size(),
         hiveClient.getDataSchema().getColumns().size() + 1,
         "Hive Schema should match the table schema + partition field");
@@ -973,7 +973,7 @@ public void testPickingOlderParquetFileIfLatestIsEmptyCommit(String syncMode) th
     final String emptyCommitTime = "200";
     HiveTestUtil.createCommitFileWithSchema(commitMetadata, emptyCommitTime, true);
     reinitHiveSyncClient();
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
 
     reinitHiveSyncClient();
     reSyncHiveTable();
@@ -1000,7 +1000,7 @@ public void testNotPickingOlderParquetFileWhenLatestCommitReadFails(String syncM
 
     reinitHiveSyncClient();
     assertFalse(
-        hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
+        hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
 
     HiveSyncTool tool = new HiveSyncTool(hiveSyncProps, getHiveConf(), fileSystem);
     // now delete the evolved commit instant
@@ -1017,7 +1017,7 @@ public void testNotPickingOlderParquetFileWhenLatestCommitReadFails(String syncM
     }
 
     // table should not be synced yet
-    assertFalse(hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist at all");
+    assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist at all");
   }
 
   @ParameterizedTest
@@ -1033,7 +1033,7 @@ public void testNotPickingOlderParquetFileWhenLatestCommitReadFailsForExistingTa
     //HiveTestUtil.createCommitFile(commitMetadata, emptyCommitTime);
     reinitHiveSyncClient();
     assertFalse(
-        hiveClient.doesTableExist(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
+        hiveClient.tableExists(HiveTestUtil.TABLE_NAME), "Table " + HiveTestUtil.TABLE_NAME + " should not exist initially");
 
     reSyncHiveTable();
 
@@ -1120,7 +1120,7 @@ public void testSyncWithoutDiffs(String syncMode) throws Exception {
     reinitHiveSyncClient();
     reSyncHiveTable();
 
-    assertTrue(hiveClient.doesTableExist(tableName));
+    assertTrue(hiveClient.tableExists(tableName));
     assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(tableName).get());
 
     HiveTestUtil.addMORPartitions(0, true, true, true, ZonedDateTime.now().plusDays(2), commitTime1, commitTime2);
@@ -1138,7 +1138,7 @@ private void reSyncHiveTable() {
 
   private void reinitHiveSyncClient() {
     hiveSyncTool = new HiveSyncTool(hiveSyncProps, HiveTestUtil.getHiveConf(), fileSystem);
-    hiveClient = hiveSyncTool.hoodieHiveClient;
+    hiveClient = (HoodieHiveClient) hiveSyncTool.hoodieHiveClient;
   }
 
   private int getPartitionFieldSize() {
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/TestHiveSyncGlobalCommitTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/TestHiveSyncGlobalCommitTool.java
deleted file mode 100644
index 980374e0baa4e..0000000000000
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/TestHiveSyncGlobalCommitTool.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.hive.testutils;
-
-import org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig;
-import org.apache.hudi.hive.replication.HiveSyncGlobalCommitTool;
-
-import org.apache.hadoop.fs.Path;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import java.util.Collections;
-
-import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
-import static org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig.LOCAL_BASE_PATH;
-import static org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig.LOCAL_HIVE_SERVER_JDBC_URLS;
-import static org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig.LOCAL_HIVE_SITE_URI;
-import static org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig.REMOTE_BASE_PATH;
-import static org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig.REMOTE_HIVE_SERVER_JDBC_URLS;
-import static org.apache.hudi.hive.replication.HiveSyncGlobalCommitConfig.REMOTE_HIVE_SITE_URI;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class TestHiveSyncGlobalCommitTool {
-
-  TestCluster localCluster;
-  TestCluster remoteCluster;
-
-  private static String DB_NAME = "foo";
-  private static String TBL_NAME = "bar";
-
-  private HiveSyncGlobalCommitConfig getGlobalCommitConfig(
-      String commitTime, String dbName, String tblName) throws Exception {
-    HiveSyncGlobalCommitConfig config = new HiveSyncGlobalCommitConfig();
-    config.properties.setProperty(LOCAL_HIVE_SITE_URI, localCluster.getHiveSiteXmlLocation());
-    config.properties.setProperty(REMOTE_HIVE_SITE_URI, remoteCluster.getHiveSiteXmlLocation());
-    config.properties.setProperty(LOCAL_HIVE_SERVER_JDBC_URLS, localCluster.getHiveJdBcUrl());
-    config.properties.setProperty(REMOTE_HIVE_SERVER_JDBC_URLS, remoteCluster.getHiveJdBcUrl());
-    config.properties.setProperty(LOCAL_BASE_PATH, localCluster.tablePath(dbName, tblName));
-    config.properties.setProperty(REMOTE_BASE_PATH, remoteCluster.tablePath(dbName, tblName));
-    config.globallyReplicatedTimeStamp = commitTime;
-    config.hiveUser = System.getProperty("user.name");
-    config.hivePass = "";
-    config.databaseName = dbName;
-    config.tableName = tblName;
-    config.basePath = localCluster.tablePath(dbName, tblName);
-    config.assumeDatePartitioning = true;
-    config.usePreApacheInputFormat = false;
-    config.partitionFields = Collections.singletonList("datestr");
-    return config;
-  }
-
-  private void compareEqualLastReplicatedTimeStamp(HiveSyncGlobalCommitConfig config) throws Exception {
-    assertEquals(localCluster.getHMSClient().getTable(config.databaseName, config.tableName).getParameters().get(GLOBALLY_CONSISTENT_READ_TIMESTAMP),
-        remoteCluster.getHMSClient().getTable(config.databaseName, config.tableName).getParameters().get(GLOBALLY_CONSISTENT_READ_TIMESTAMP),
-        "compare replicated timestamps");
-  }
-
-  @BeforeEach
-  public void setUp() throws Exception {
-    localCluster = new TestCluster();
-    localCluster.setup();
-    remoteCluster = new TestCluster();
-    remoteCluster.setup();
-    localCluster.forceCreateDb(DB_NAME);
-    remoteCluster.forceCreateDb(DB_NAME);
-    localCluster.dfsCluster.getFileSystem().delete(new Path(localCluster.tablePath(DB_NAME, TBL_NAME)), true);
-    remoteCluster.dfsCluster.getFileSystem().delete(new Path(remoteCluster.tablePath(DB_NAME, TBL_NAME)), true);
-  }
-
-  @AfterEach
-  public void clear() throws Exception {
-    localCluster.getHMSClient().dropTable(DB_NAME, TBL_NAME);
-    remoteCluster.getHMSClient().dropTable(DB_NAME, TBL_NAME);
-    localCluster.shutDown();
-    remoteCluster.shutDown();
-  }
-
-  @Test
-  public void testBasicGlobalCommit() throws Exception {
-    String commitTime = "100";
-    localCluster.createCOWTable(commitTime, 5, DB_NAME, TBL_NAME);
-    // simulate drs
-    remoteCluster.createCOWTable(commitTime, 5, DB_NAME, TBL_NAME);
-    HiveSyncGlobalCommitConfig config = getGlobalCommitConfig(commitTime, DB_NAME, TBL_NAME);
-    HiveSyncGlobalCommitTool tool = new HiveSyncGlobalCommitTool(config);
-    assertTrue(tool.commit());
-    compareEqualLastReplicatedTimeStamp(config);
-  }
-
-  @Test
-  public void testBasicRollback() throws Exception {
-    String commitTime = "100";
-    localCluster.createCOWTable(commitTime, 5, DB_NAME, TBL_NAME);
-    // simulate drs
-    remoteCluster.createCOWTable(commitTime, 5, DB_NAME, TBL_NAME);
-    HiveSyncGlobalCommitConfig config = getGlobalCommitConfig(commitTime, DB_NAME, TBL_NAME);
-    HiveSyncGlobalCommitTool tool = new HiveSyncGlobalCommitTool(config);
-    assertFalse(localCluster.getHMSClient().tableExists(DB_NAME, TBL_NAME));
-    assertFalse(remoteCluster.getHMSClient().tableExists(DB_NAME, TBL_NAME));
-    // stop the remote cluster hive server to simulate cluster going down
-    remoteCluster.stopHiveServer2();
-    assertFalse(tool.commit());
-    assertEquals(commitTime, localCluster.getHMSClient()
-        .getTable(config.databaseName, config.tableName).getParameters()
-        .get(GLOBALLY_CONSISTENT_READ_TIMESTAMP));
-    assertTrue(tool.rollback()); // do a rollback
-    assertNotEquals(commitTime, localCluster.getHMSClient()
-        .getTable(config.databaseName, config.tableName).getParameters()
-        .get(GLOBALLY_CONSISTENT_READ_TIMESTAMP));
-    assertFalse(remoteCluster.getHMSClient().tableExists(DB_NAME, TBL_NAME));
-    remoteCluster.startHiveServer2();
-  }
-}
diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml
index 19a83e757c0d4..142eaf6361205 100644
--- a/hudi-sync/hudi-sync-common/pom.xml
+++ b/hudi-sync/hudi-sync-common/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -104,6 +104,14 @@
       <artifactId>junit-platform-commons</artifactId>
       <scope>test</scope>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-common</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java
index 1815491f1867e..8eec327890ca4 100644
--- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java
@@ -18,6 +18,13 @@
 
 package org.apache.hudi.sync.common;
 
+import java.io.Serializable;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
@@ -31,22 +38,16 @@
 import org.apache.hudi.common.table.timeline.TimelineUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.schema.MessageType;
 
-import java.io.Serializable;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-
-public abstract class AbstractSyncHoodieClient {
+public abstract class AbstractSyncHoodieClient implements AutoCloseable {
 
   private static final Logger LOG = LogManager.getLogger(AbstractSyncHoodieClient.class);
 
+  public static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
   public static final TypeConverter TYPE_CONVERTOR = new TypeConverter() {};
 
   protected final HoodieTableMetaClient metaClient;
@@ -89,17 +90,29 @@ public abstract void createTable(String tableName, MessageType storageSchema,
                                    String serdeClass, Map<String, String> serdeProperties,
                                    Map<String, String> tableProperties);
 
+  /**
+   * @deprecated Use {@link #tableExists} instead.
+   */
+  @Deprecated
   public abstract boolean doesTableExist(String tableName);
 
+  public abstract boolean tableExists(String tableName);
+
   public abstract Option<String> getLastCommitTimeSynced(String tableName);
 
   public abstract void updateLastCommitTimeSynced(String tableName);
 
+  public abstract Option<String> getLastReplicatedTime(String tableName);
+
+  public abstract void updateLastReplicatedTimeStamp(String tableName, String timeStamp);
+
+  public abstract void deleteLastReplicatedTimeStamp(String tableName);
+
   public abstract void addPartitionsToTable(String tableName, List<String> partitionsToAdd);
 
   public abstract void updatePartitionsToTable(String tableName, List<String> changedPartitions);
 
-  public abstract void dropPartitionsToTable(String tableName, List<String> partitionsToDrop);
+  public abstract void dropPartitions(String tableName, List<String> partitionsToDrop);
 
   public  void updateTableProperties(String tableName, Map<String, String> tableProperties) {}
 
@@ -156,8 +169,7 @@ public MessageType getDataSchema() {
 
   public boolean isDropPartition() {
     try {
-      Option<HoodieCommitMetadata> hoodieCommitMetadata;
-      hoodieCommitMetadata = new TableSchemaResolver(metaClient).getLatestCommitMetadata();
+      Option<HoodieCommitMetadata> hoodieCommitMetadata = HoodieTableMetadataUtil.getLatestCommitMetadata(metaClient);
 
       if (hoodieCommitMetadata.isPresent()
           && WriteOperationType.DELETE_PARTITION.equals(hoodieCommitMetadata.get().getOperationType())) {
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/Partition.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/Partition.java
new file mode 100644
index 0000000000000..8e2076f95cb9f
--- /dev/null
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/Partition.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.common.model;
+
+import java.util.List;
+
+public class Partition {
+
+  private final List<String> values;
+
+  private final String storageLocation;
+
+  public Partition(List<String> values, String storageLocation) {
+    this.values = values;
+    this.storageLocation = storageLocation;
+  }
+
+  public List<String> getValues() {
+    return values;
+  }
+
+  public String getStorageLocation() {
+    return storageLocation;
+  }
+}
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
new file mode 100644
index 0000000000000..3ac238c895ad3
--- /dev/null
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sync.common.util;
+
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.metadata.HoodieMetadataFileSystemView;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.BufferedWriter;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public class ManifestFileWriter {
+
+  public static final String MANIFEST_FOLDER_NAME = "manifest";
+  public static final String MANIFEST_FILE_NAME = "latest-snapshot.csv";
+  private static final Logger LOG = LogManager.getLogger(ManifestFileWriter.class);
+
+  private final HoodieTableMetaClient metaClient;
+  private final boolean useFileListingFromMetadata;
+  private final boolean assumeDatePartitioning;
+
+  private ManifestFileWriter(Configuration hadoopConf, String basePath, boolean useFileListingFromMetadata, boolean assumeDatePartitioning) {
+    this.metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
+    this.useFileListingFromMetadata = useFileListingFromMetadata;
+    this.assumeDatePartitioning = assumeDatePartitioning;
+  }
+
+  /**
+   * Write all the latest base file names to the manifest file.
+   */
+  public synchronized void writeManifestFile() {
+    try {
+      List<String> baseFiles = fetchLatestBaseFilesForAllPartitions(metaClient, useFileListingFromMetadata, assumeDatePartitioning)
+          .collect(Collectors.toList());
+      if (baseFiles.isEmpty()) {
+        LOG.warn("No base file to generate manifest file.");
+        return;
+      } else {
+        LOG.info("Writing base file names to manifest file: " + baseFiles.size());
+      }
+      final Path manifestFilePath = getManifestFilePath();
+      try (FSDataOutputStream outputStream = metaClient.getFs().create(manifestFilePath, true);
+           BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8))) {
+        for (String f : baseFiles) {
+          writer.write(f);
+          writer.write("\n");
+        }
+      }
+    } catch (Exception e) {
+      throw new HoodieException("Error in writing manifest file.", e);
+    }
+  }
+
+  public static Stream<String> fetchLatestBaseFilesForAllPartitions(HoodieTableMetaClient metaClient,
+      boolean useFileListingFromMetadata, boolean assumeDatePartitioning) {
+    try {
+      List<String> partitions = FSUtils.getAllPartitionPaths(new HoodieLocalEngineContext(metaClient.getHadoopConf()),
+          metaClient.getBasePath(), useFileListingFromMetadata, assumeDatePartitioning);
+      LOG.info("Retrieve all partitions: " + partitions.size());
+      return partitions.parallelStream().flatMap(p -> {
+        Configuration hadoopConf = metaClient.getHadoopConf();
+        HoodieLocalEngineContext engContext = new HoodieLocalEngineContext(hadoopConf);
+        HoodieMetadataFileSystemView fsView = new HoodieMetadataFileSystemView(engContext, metaClient,
+            metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(),
+            HoodieMetadataConfig.newBuilder().enable(useFileListingFromMetadata).withAssumeDatePartitioning(assumeDatePartitioning).build());
+        return fsView.getLatestBaseFiles(p).map(HoodieBaseFile::getFileName);
+      });
+    } catch (Exception e) {
+      throw new HoodieException("Error in fetching latest base files.", e);
+    }
+  }
+
+  public Path getManifestFolder() {
+    return new Path(metaClient.getMetaPath(), MANIFEST_FOLDER_NAME);
+  }
+
+  public Path getManifestFilePath() {
+    return new Path(getManifestFolder(), MANIFEST_FILE_NAME);
+  }
+
+  public String getManifestSourceUri() {
+    return new Path(getManifestFolder(), "*").toUri().toString();
+  }
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Builder for {@link ManifestFileWriter}.
+   */
+  public static class Builder {
+
+    private Configuration conf;
+    private String basePath;
+    private boolean useFileListingFromMetadata;
+    private boolean assumeDatePartitioning;
+
+    public Builder setConf(Configuration conf) {
+      this.conf = conf;
+      return this;
+    }
+
+    public Builder setBasePath(String basePath) {
+      this.basePath = basePath;
+      return this;
+    }
+
+    public Builder setUseFileListingFromMetadata(boolean useFileListingFromMetadata) {
+      this.useFileListingFromMetadata = useFileListingFromMetadata;
+      return this;
+    }
+
+    public Builder setAssumeDatePartitioning(boolean assumeDatePartitioning) {
+      this.assumeDatePartitioning = assumeDatePartitioning;
+      return this;
+    }
+
+    public ManifestFileWriter build() {
+      ValidationUtils.checkArgument(conf != null, "Configuration needs to be set to init ManifestFileGenerator");
+      ValidationUtils.checkArgument(basePath != null, "basePath needs to be set to init ManifestFileGenerator");
+      return new ManifestFileWriter(conf, basePath, useFileListingFromMetadata, assumeDatePartitioning);
+    }
+  }
+}
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java
index f40e53b80f871..def85c5b805d9 100644
--- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package org.apache.hudi.sync.common.util;
 
 import org.apache.hudi.common.config.TypedProperties;
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/TableUtils.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/TableUtils.java
new file mode 100644
index 0000000000000..d392bb64184f2
--- /dev/null
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/TableUtils.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.sync.common.util;
+
+public final class TableUtils {
+
+  public static String tableId(String database, String table) {
+    return String.format("%s.%s", database, table);
+  }
+}
diff --git a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
new file mode 100644
index 0000000000000..47b60f7232579
--- /dev/null
+++ b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sync.common.util;
+
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.util.FileIOUtils;
+
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.stream.IntStream;
+
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS;
+import static org.apache.hudi.sync.common.util.ManifestFileWriter.fetchLatestBaseFilesForAllPartitions;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class TestManifestFileWriter extends HoodieCommonTestHarness {
+
+  @BeforeEach
+  public void setUp() throws IOException {
+    initMetaClient();
+  }
+
+  @Test
+  public void testMultiLevelPartitionedTable() throws Exception {
+    // Generate 10 files under each partition
+    createTestDataForPartitionedTable(metaClient, 10);
+    ManifestFileWriter manifestFileWriter = ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
+    assertEquals(30, fetchLatestBaseFilesForAllPartitions(metaClient, false, false).count());
+  }
+
+  @Test
+  public void testCreateManifestFile() throws Exception {
+    // Generate 10 files under each partition
+    createTestDataForPartitionedTable(metaClient, 3);
+    ManifestFileWriter manifestFileWriter = ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
+    manifestFileWriter.writeManifestFile();
+    Path manifestFilePath = manifestFileWriter.getManifestFilePath();
+    try (InputStream is = metaClient.getFs().open(manifestFilePath)) {
+      assertEquals(9, FileIOUtils.readAsUTFStringLines(is).size(), "there should be 9 base files in total; 3 per partition.");
+    }
+  }
+
+  private static void createTestDataForPartitionedTable(HoodieTableMetaClient metaClient, int numFilesPerPartition) throws Exception {
+    final String instantTime = "100";
+    HoodieTestTable testTable = HoodieTestTable.of(metaClient).addCommit(instantTime);
+    for (String partition : DEFAULT_PARTITION_PATHS) {
+      testTable.withPartitionMetaFiles(partition)
+          .withBaseFilesInPartition(partition, IntStream.range(0, numFilesPerPartition).toArray());
+    }
+  }
+
+  @Test
+  public void getManifestSourceUri() {
+    ManifestFileWriter manifestFileWriter = ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
+    String sourceUri = manifestFileWriter.getManifestSourceUri();
+    assertEquals(new Path(basePath, ".hoodie/manifest/*").toUri().toString(), sourceUri);
+  }
+}
diff --git a/hudi-sync/pom.xml b/hudi-sync/pom.xml
index 776a194168445..0ee145418f5ee 100644
--- a/hudi-sync/pom.xml
+++ b/hudi-sync/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
@@ -31,8 +31,9 @@
   </properties>
 
   <modules>
-      <module>hudi-sync-common</module>
-      <module>hudi-hive-sync</module>
-      <module>hudi-dla-sync</module>
+    <module>hudi-datahub-sync</module>
+    <module>hudi-dla-sync</module>
+    <module>hudi-hive-sync</module>
+    <module>hudi-sync-common</module>
   </modules>
 </project>
diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml
index cb2c643c78741..c360279326c02 100644
--- a/hudi-timeline-service/pom.xml
+++ b/hudi-timeline-service/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index 39510537ba2fe..8fafb06d98ddf 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
@@ -233,12 +233,6 @@
       </exclusions>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
index 26639628eab1b..f6905f92d9440 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
@@ -20,24 +20,19 @@
 
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieClusteringException;
-import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.table.HoodieSparkTable;
 
 import com.beust.jcommander.JCommander;
 import com.beust.jcommander.Parameter;
-import org.apache.avro.Schema;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -49,15 +44,15 @@
 import java.util.Date;
 import java.util.List;
 
+import static org.apache.hudi.utilities.UtilHelpers.EXECUTE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE_AND_EXECUTE;
+
 public class HoodieClusteringJob {
 
-  public static final String EXECUTE = "execute";
-  public static final String SCHEDULE = "schedule";
-  public static final String SCHEDULE_AND_EXECUTE = "scheduleandexecute";
   private static final Logger LOG = LogManager.getLogger(HoodieClusteringJob.class);
   private final Config cfg;
-  private transient FileSystem fs;
-  private TypedProperties props;
+  private final TypedProperties props;
   private final JavaSparkContext jsc;
   private final HoodieTableMetaClient metaClient;
 
@@ -83,34 +78,34 @@ public static class Config implements Serializable {
     @Parameter(names = {"--instant-time", "-it"}, description = "Clustering Instant time, only used when set --mode execute. "
         + "If the instant time is not provided with --mode execute, "
         + "the earliest scheduled clustering instant time is used by default. "
-        + "When set \"--mode scheduleAndExecute\" this instant-time will be ignored.", required = false)
+        + "When set \"--mode scheduleAndExecute\" this instant-time will be ignored.")
     public String clusteringInstantTime = null;
-    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = false)
+    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert")
     public int parallelism = 1;
-    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master")
     public String sparkMaster = null;
     @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = true)
     public String sparkMemory = null;
-    @Parameter(names = {"--retry", "-rt"}, description = "number of retries", required = false)
+    @Parameter(names = {"--retry", "-rt"}, description = "number of retries")
     public int retry = 0;
 
     @Parameter(names = {"--schedule", "-sc"}, description = "Schedule clustering @desperate soon please use \"--mode schedule\" instead")
     public Boolean runSchedule = false;
 
     @Parameter(names = {"--retry-last-failed-clustering-job", "-rc"}, description = "Take effect when using --mode/-m scheduleAndExecute. Set true means "
-            + "check, rollback and execute last failed clustering plan instead of planing a new clustering job directly.", required = false)
+        + "check, rollback and execute last failed clustering plan instead of planing a new clustering job directly.")
     public Boolean retryLastFailedClusteringJob = false;
 
     @Parameter(names = {"--mode", "-m"}, description = "Set job mode: Set \"schedule\" means make a cluster plan; "
-            + "Set \"execute\" means execute a cluster plan at given instant which means --instant-time is needed here; "
-            + "Set \"scheduleAndExecute\" means make a cluster plan first and execute that plan immediately", required = false)
+        + "Set \"execute\" means execute a cluster plan at given instant which means --instant-time is needed here; "
+        + "Set \"scheduleAndExecute\" means make a cluster plan first and execute that plan immediately")
     public String runningMode = null;
 
     @Parameter(names = {"--help", "-h"}, help = true)
     public Boolean help = false;
 
     @Parameter(names = {"--job-max-processing-time-ms", "-jt"}, description = "Take effect when using --mode/-m scheduleAndExecute and --retry-last-failed-clustering-job/-rc true. "
-        + "If maxProcessingTimeMs passed but clustering job is still unfinished, hoodie would consider this job as failed and relaunch.", required = false)
+        + "If maxProcessingTimeMs passed but clustering job is still unfinished, hoodie would consider this job as failed and relaunch.")
     public long maxProcessingTimeMs = 0;
 
     @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for "
@@ -119,7 +114,7 @@ public static class Config implements Serializable {
 
     @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
         + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
-            splitter = IdentitySplitter.class)
+        splitter = IdentitySplitter.class)
     public List<String> configs = new ArrayList<>();
   }
 
@@ -155,10 +150,9 @@ private static void validateRunningMode(Config cfg) {
   }
 
   public int cluster(int retry) {
-    this.fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration());
     // need to do validate in case that users call cluster() directly without setting cfg.runningMode
     validateRunningMode(cfg);
-    int ret = UtilHelpers.retry(retry, () -> {
+    return UtilHelpers.retry(retry, () -> {
       switch (cfg.runningMode.toLowerCase()) {
         case SCHEDULE: {
           LOG.info("Running Mode: [" + SCHEDULE + "]; Do schedule");
@@ -183,20 +177,10 @@ public int cluster(int retry) {
         }
       }
     }, "Cluster failed");
-    return ret;
-  }
-
-  private String getSchemaFromLatestInstant() throws Exception {
-    TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
-    if (metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
-      throw new HoodieException("Cannot run clustering without any completed commits");
-    }
-    Schema schema = schemaResolver.getTableAvroSchema(false);
-    return schema.toString();
   }
 
   private int doCluster(JavaSparkContext jsc) throws Exception {
-    String schemaStr = getSchemaFromLatestInstant();
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
     try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
       if (StringUtils.isNullOrEmpty(cfg.clusteringInstantTime)) {
         // Instant time is not specified
@@ -224,7 +208,7 @@ public Option<String> doSchedule() throws Exception {
   }
 
   private Option<String> doSchedule(JavaSparkContext jsc) throws Exception {
-    String schemaStr = getSchemaFromLatestInstant();
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
     try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
       return doSchedule(client);
     }
@@ -240,7 +224,7 @@ private Option<String> doSchedule(SparkRDDWriteClient<HoodieRecordPayload> clien
 
   private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
     LOG.info("Step 1: Do schedule");
-    String schemaStr = getSchemaFromLatestInstant();
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
     try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
       Option<String> instantTime = Option.empty();
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java
index 0180fa0af1590..ef05bdc03add3 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java
@@ -69,7 +69,6 @@
  * ```
  * spark-submit \
  * --class org.apache.hudi.utilities.HoodieDataTableValidator \
- * --packages org.apache.spark:spark-avro_2.11:2.4.4 \
  * --master spark://xxxx:7077 \
  * --driver-memory 1g \
  * --executor-memory 1g \
@@ -85,7 +84,6 @@
  * ```
  * spark-submit \
  * --class org.apache.hudi.utilities.HoodieDataTableValidator \
- * --packages org.apache.spark:spark-avro_2.11:2.4.4 \
  * --master spark://xxxx:7077 \
  * --driver-memory 1g \
  * --executor-memory 1g \
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java
new file mode 100644
index 0000000000000..2741e2b98a667
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieIndexException;
+import org.apache.hudi.metadata.MetadataPartitionType;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.jetbrains.annotations.TestOnly;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
+import static org.apache.hudi.utilities.UtilHelpers.EXECUTE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE_AND_EXECUTE;
+
+/**
+ * A tool to run metadata indexing asynchronously.
+ * <p>
+ * Example command (assuming indexer.properties contains related index configs, see {@link org.apache.hudi.common.config.HoodieMetadataConfig} for configs):
+ * <p>
+ * spark-submit \
+ * --class org.apache.hudi.utilities.HoodieIndexer \
+ * /path/to/hudi/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.11.0-SNAPSHOT.jar \
+ * --props /path/to/indexer.properties \
+ * --mode scheduleAndExecute \
+ * --base-path /tmp/hudi_trips_cow \
+ * --table-name hudi_trips_cow \
+ * --index-types COLUMN_STATS \
+ * --parallelism 1 \
+ * --spark-memory 1g
+ * <p>
+ * A sample indexer.properties file:
+ * <p>
+ * hoodie.metadata.index.async=true
+ * hoodie.metadata.index.column.stats.enable=true
+ * hoodie.metadata.index.check.timeout.seconds=60
+ * hoodie.write.concurrency.mode=optimistic_concurrency_control
+ * hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider
+ */
+public class HoodieIndexer {
+
+  private static final Logger LOG = LogManager.getLogger(HoodieIndexer.class);
+  private static final String DROP_INDEX = "dropindex";
+
+  private final HoodieIndexer.Config cfg;
+  private TypedProperties props;
+  private final JavaSparkContext jsc;
+  private final HoodieTableMetaClient metaClient;
+
+  public HoodieIndexer(JavaSparkContext jsc, HoodieIndexer.Config cfg) {
+    this.cfg = cfg;
+    this.jsc = jsc;
+    this.props = isNullOrEmpty(cfg.propsFilePath)
+        ? UtilHelpers.buildProperties(cfg.configs)
+        : readConfigFromFileSystem(jsc, cfg);
+    this.metaClient = UtilHelpers.createMetaClient(jsc, cfg.basePath, true);
+  }
+
+  private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, HoodieIndexer.Config cfg) {
+    return UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(cfg.propsFilePath), cfg.configs)
+        .getProps(true);
+  }
+
+  public static class Config implements Serializable {
+    @Parameter(names = {"--base-path", "-sp"}, description = "Base path for the table", required = true)
+    public String basePath = null;
+    @Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
+    public String tableName = null;
+    @Parameter(names = {"--instant-time", "-it"}, description = "Indexing Instant time")
+    public String indexInstantTime = null;
+    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = true)
+    public int parallelism = 1;
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master")
+    public String sparkMaster = null;
+    @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = true)
+    public String sparkMemory = null;
+    @Parameter(names = {"--retry", "-rt"}, description = "number of retries")
+    public int retry = 0;
+    @Parameter(names = {"--index-types", "-ixt"}, description = "Comma-separated index types to be built, e.g. BLOOM_FILTERS,COLUMN_STATS", required = true)
+    public String indexTypes = null;
+    @Parameter(names = {"--mode", "-m"}, description = "Set job mode: Set \"schedule\" to generate an indexing plan; "
+        + "Set \"execute\" to execute the indexing plan at the given instant, which means --instant-time is required here; "
+        + "Set \"scheduleandExecute\" to generate an indexing plan first and execute that plan immediately;"
+        + "Set \"dropindex\" to drop the index types specified in --index-types;")
+    public String runningMode = null;
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
+    @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for hoodie client for indexing")
+    public String propsFilePath = null;
+
+    @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
+        + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
+        splitter = IdentitySplitter.class)
+    public List<String> configs = new ArrayList<>();
+  }
+
+  public static void main(String[] args) {
+    final HoodieIndexer.Config cfg = new HoodieIndexer.Config();
+    JCommander cmd = new JCommander(cfg, null, args);
+
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+
+    final JavaSparkContext jsc = UtilHelpers.buildSparkContext("indexing-" + cfg.tableName, cfg.sparkMaster, cfg.sparkMemory);
+    HoodieIndexer indexer = new HoodieIndexer(jsc, cfg);
+    int result = indexer.start(cfg.retry);
+    String resultMsg = String.format("Indexing with basePath: %s, tableName: %s, runningMode: %s",
+        cfg.basePath, cfg.tableName, cfg.runningMode);
+    if (result == -1) {
+      LOG.error(resultMsg + " failed");
+    } else {
+      LOG.info(resultMsg + " success");
+    }
+    jsc.stop();
+  }
+
+  public int start(int retry) {
+    // indexing should be done only if metadata is enabled
+    if (!props.getBoolean(HoodieMetadataConfig.ENABLE.key())) {
+      LOG.error(String.format("Metadata is not enabled. Please set %s to true.", HoodieMetadataConfig.ENABLE.key()));
+      return -1;
+    }
+
+    return UtilHelpers.retry(retry, () -> {
+      switch (cfg.runningMode.toLowerCase()) {
+        case SCHEDULE: {
+          LOG.info("Running Mode: [" + SCHEDULE + "]; Do schedule");
+          Option<String> instantTime = scheduleIndexing(jsc);
+          int result = instantTime.isPresent() ? 0 : -1;
+          if (result == 0) {
+            LOG.info("The schedule instant time is " + instantTime.get());
+          }
+          return result;
+        }
+        case SCHEDULE_AND_EXECUTE: {
+          LOG.info("Running Mode: [" + SCHEDULE_AND_EXECUTE + "]");
+          return scheduleAndRunIndexing(jsc);
+        }
+        case EXECUTE: {
+          LOG.info("Running Mode: [" + EXECUTE + "];");
+          return runIndexing(jsc);
+        }
+        case DROP_INDEX: {
+          LOG.info("Running Mode: [" + DROP_INDEX + "];");
+          return dropIndex(jsc);
+        }
+        default: {
+          LOG.info("Unsupported running mode [" + cfg.runningMode + "], quit the job directly");
+          return -1;
+        }
+      }
+    }, "Indexer failed");
+  }
+
+  @TestOnly
+  public Option<String> doSchedule() throws Exception {
+    return this.scheduleIndexing(jsc);
+  }
+
+  private Option<String> scheduleIndexing(JavaSparkContext jsc) throws Exception {
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
+    try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
+      return doSchedule(client);
+    }
+  }
+
+  private Option<String> doSchedule(SparkRDDWriteClient<HoodieRecordPayload> client) {
+    List<MetadataPartitionType> partitionTypes = getRequestedPartitionTypes(cfg.indexTypes);
+    checkArgument(partitionTypes.size() == 1, "Currently, only one index type can be scheduled at a time.");
+    if (indexExists(partitionTypes)) {
+      return Option.empty();
+    }
+    Option<String> indexingInstant = client.scheduleIndexing(partitionTypes);
+    if (!indexingInstant.isPresent()) {
+      LOG.error("Scheduling of index action did not return any instant.");
+    }
+    return indexingInstant;
+  }
+
+  private boolean indexExists(List<MetadataPartitionType> partitionTypes) {
+    Set<String> indexedMetadataPartitions = getCompletedMetadataPartitions(metaClient.getTableConfig());
+    Set<String> requestedIndexPartitionPaths = partitionTypes.stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
+    requestedIndexPartitionPaths.retainAll(indexedMetadataPartitions);
+    if (!requestedIndexPartitionPaths.isEmpty()) {
+      LOG.error("Following indexes already built: " + requestedIndexPartitionPaths);
+      return true;
+    }
+    return false;
+  }
+
+  private int runIndexing(JavaSparkContext jsc) throws Exception {
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
+    try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
+      if (isNullOrEmpty(cfg.indexInstantTime)) {
+        // Instant time is not specified
+        // Find the earliest scheduled indexing instant for execution
+        Option<HoodieInstant> earliestPendingIndexInstant = metaClient.getActiveTimeline()
+            .filterPendingIndexTimeline()
+            .firstInstant();
+        if (earliestPendingIndexInstant.isPresent()) {
+          cfg.indexInstantTime = earliestPendingIndexInstant.get().getTimestamp();
+          LOG.info("Found the earliest scheduled indexing instant which will be executed: "
+              + cfg.indexInstantTime);
+        } else {
+          throw new HoodieIndexException("There is no scheduled indexing in the table.");
+        }
+      }
+      return handleResponse(client.index(cfg.indexInstantTime)) ? 0 : 1;
+    }
+  }
+
+  private int scheduleAndRunIndexing(JavaSparkContext jsc) throws Exception {
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
+    try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
+      Option<String> indexingInstantTime = doSchedule(client);
+      if (indexingInstantTime.isPresent()) {
+        return handleResponse(client.index(indexingInstantTime.get())) ? 0 : 1;
+      } else {
+        return -1;
+      }
+    }
+  }
+
+  private int dropIndex(JavaSparkContext jsc) throws Exception {
+    List<MetadataPartitionType> partitionTypes = getRequestedPartitionTypes(cfg.indexTypes);
+    String schemaStr = UtilHelpers.getSchemaFromLatestInstant(metaClient);
+    try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
+      client.dropIndex(partitionTypes);
+      return 0;
+    } catch (Exception e) {
+      LOG.error("Failed to drop index. ", e);
+      return -1;
+    }
+  }
+
+  private boolean handleResponse(Option<HoodieIndexCommitMetadata> commitMetadata) {
+    if (!commitMetadata.isPresent()) {
+      LOG.error("Indexing failed as no commit metadata present.");
+      return false;
+    }
+    List<HoodieIndexPartitionInfo> indexPartitionInfos = commitMetadata.get().getIndexPartitionInfos();
+    LOG.info(String.format("Indexing complete for partitions: %s",
+        indexPartitionInfos.stream().map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toList())));
+    return isIndexBuiltForAllRequestedTypes(indexPartitionInfos);
+  }
+
+  boolean isIndexBuiltForAllRequestedTypes(List<HoodieIndexPartitionInfo> indexPartitionInfos) {
+    Set<String> indexedPartitions = indexPartitionInfos.stream()
+        .map(HoodieIndexPartitionInfo::getMetadataPartitionPath).collect(Collectors.toSet());
+    Set<String> requestedPartitions = getRequestedPartitionTypes(cfg.indexTypes).stream()
+        .map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
+    requestedPartitions.removeAll(indexedPartitions);
+    return requestedPartitions.isEmpty();
+  }
+
+  List<MetadataPartitionType> getRequestedPartitionTypes(String indexTypes) {
+    List<String> requestedIndexTypes = Arrays.asList(indexTypes.split(","));
+    return requestedIndexTypes.stream()
+        .map(p -> MetadataPartitionType.valueOf(p.toUpperCase(Locale.ROOT)))
+        // FILES partition is initialized synchronously while getting metadata writer
+        .filter(p -> !MetadataPartitionType.FILES.equals(p))
+        .collect(Collectors.toList());
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
index 832d942c86afc..a693bb4c65e47 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
@@ -32,16 +32,22 @@
 import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieFileGroup;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
+import org.apache.hudi.common.table.log.HoodieLogFormat;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.FileSystemViewManager;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.CleanerUtils;
+import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ParquetUtils;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
@@ -49,15 +55,19 @@
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
 import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.metadata.HoodieTableMetadataUtil;
 import org.apache.hudi.utilities.util.BloomFilterData;
 
 import com.beust.jcommander.JCommander;
 import com.beust.jcommander.Parameter;
 import jline.internal.Log;
+import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 
@@ -67,6 +77,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
@@ -75,6 +86,8 @@
 import java.util.concurrent.Executors;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME;
+
 /**
  * A validator with spark-submit to compare information, such as partitions, file listing, index, etc.,
  * between metadata table and filesystem.
@@ -85,6 +98,9 @@
  * - `--validate-all-file-groups`: validate all file groups, and all file slices within file groups.
  * - `--validate-all-column-stats`: validate column stats for all columns in the schema
  * - `--validate-bloom-filters`: validate bloom filters of base files
+ *
+ * If the Hudi table is on the local file system, the base path passed to `--base-path` must have
+ * "file:" prefix to avoid validation failure.
  * <p>
  * - Default : This validator will compare the results between metadata table and filesystem only once.
  * <p>
@@ -92,7 +108,6 @@
  * ```
  * spark-submit \
  *  --class org.apache.hudi.utilities.HoodieMetadataTableValidator \
- *  --packages org.apache.spark:spark-avro_2.11:2.4.4 \
  *  --master spark://xxxx:7077 \
  *  --driver-memory 1g \
  *  --executor-memory 1g \
@@ -111,7 +126,6 @@
  * ```
  * spark-submit \
  *  --class org.apache.hudi.utilities.HoodieMetadataTableValidator \
- *  --packages org.apache.spark:spark-avro_2.11:2.4.4 \
  *  --master spark://xxxx:7077 \
  *  --driver-memory 1g \
  *  --executor-memory 1g \
@@ -140,8 +154,11 @@ public class HoodieMetadataTableValidator implements Serializable {
 
   protected transient Option<AsyncMetadataTableValidateService> asyncMetadataTableValidateService;
 
+  private final String taskLabels;
+
   public HoodieMetadataTableValidator(HoodieTableMetaClient metaClient) {
     this.metaClient = metaClient;
+    this.taskLabels = StringUtils.EMPTY_STRING;
   }
 
   public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
@@ -158,6 +175,27 @@ public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
         .build();
 
     this.asyncMetadataTableValidateService = cfg.continuous ? Option.of(new AsyncMetadataTableValidateService()) : Option.empty();
+    this.taskLabels = generateValidationTaskLabels();
+  }
+
+  private String generateValidationTaskLabels() {
+    List<String> labelList = new ArrayList<>();
+    if (cfg.validateLatestBaseFiles) {
+      labelList.add("validate-latest-base-files");
+    }
+    if (cfg.validateLatestFileSlices) {
+      labelList.add("validate-latest-file-slices");
+    }
+    if (cfg.validateAllFileGroups) {
+      labelList.add("validate-all-file-groups");
+    }
+    if (cfg.validateAllColumnStats) {
+      labelList.add("validate-all-column-stats");
+    }
+    if (cfg.validateBloomFilters) {
+      labelList.add("validate-bloom-filters");
+    }
+    return String.join(",", labelList);
   }
 
   /**
@@ -359,6 +397,9 @@ public void doMetadataTableValidation() {
     String basePath = metaClient.getBasePath();
     Set<String> baseFilesForCleaning = Collections.emptySet();
 
+    // check metadata table is available to read.
+    checkMetadataTableIsAvailable();
+
     if (cfg.skipDataFilesForCleaning) {
       HoodieTimeline inflightCleaningTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterInflights();
 
@@ -395,10 +436,12 @@ public void doMetadataTableValidation() {
     List<Boolean> result = engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> {
       try {
         validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath, finalBaseFilesForCleaning);
-        LOG.info("Metadata table validation succeeded for " + partitionPath);
+        LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", partitionPath, taskLabels));
         return true;
       } catch (HoodieValidationException e) {
-        LOG.error("Metadata table validation failed for " + partitionPath + " due to HoodieValidationException", e);
+        LOG.error(
+            String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)",
+                partitionPath, taskLabels), e);
         if (!cfg.ignoreFailed) {
           throw e;
         }
@@ -411,9 +454,28 @@ public void doMetadataTableValidation() {
     }
 
     if (finalResult) {
-      LOG.info("Metadata table validation succeeded.");
+      LOG.info(String.format("Metadata table validation succeeded (%s).", taskLabels));
     } else {
-      LOG.warn("Metadata table validation failed.");
+      LOG.warn(String.format("Metadata table validation failed (%s).", taskLabels));
+    }
+  }
+
+  /**
+   * Check metadata is initialized and available to ready.
+   * If not we will log.warn and skip current validation.
+   */
+  private void checkMetadataTableIsAvailable() {
+    try {
+      HoodieTableMetaClient mdtMetaClient = HoodieTableMetaClient.builder()
+          .setConf(jsc.hadoopConfiguration()).setBasePath(new Path(cfg.basePath, HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH).toString())
+          .setLoadActiveTimelineOnLoad(true)
+          .build();
+      int finishedInstants = mdtMetaClient.getActiveTimeline().filterCompletedInstants().countInstants();
+      if (finishedInstants == 0) {
+        throw new HoodieValidationException("There is no completed instant for metadata table.");
+      }
+    } catch (Exception ex) {
+      LOG.warn("Metadata table is not available to ready for now, ", ex);
     }
   }
 
@@ -423,6 +485,21 @@ public void doMetadataTableValidation() {
   private List<String> validatePartitions(HoodieSparkEngineContext engineContext, String basePath) {
     // compare partitions
     List<String> allPartitionPathsFromFS = FSUtils.getAllPartitionPaths(engineContext, basePath, false, cfg.assumeDatePartitioning);
+    HoodieTimeline completedTimeline = metaClient.getActiveTimeline().filterCompletedInstants();
+
+    // ignore partitions created by uncommitted ingestion.
+    allPartitionPathsFromFS = allPartitionPathsFromFS.stream().parallel().filter(part -> {
+      HoodiePartitionMetadata hoodiePartitionMetadata = new HoodiePartitionMetadata(metaClient.getFs(), new Path(basePath, part));
+
+      Option<String> instantOption = hoodiePartitionMetadata.readPartitionCreatedCommitTime();
+      if (instantOption.isPresent()) {
+        String instantTime = instantOption.get();
+        return completedTimeline.containsOrBeforeTimelineStarts(instantTime);
+      } else {
+        return false;
+      }
+    }).collect(Collectors.toList());
+
     List<String> allPartitionPathsMeta = FSUtils.getAllPartitionPaths(engineContext, basePath, true, cfg.assumeDatePartitioning);
 
     Collections.sort(allPartitionPathsFromFS);
@@ -511,9 +588,9 @@ private void validateAllFileGroups(
 
     LOG.debug("All file slices from metadata: " + allFileSlicesFromMeta + ". For partitions " + partitionPath);
     LOG.debug("All file slices from direct listing: " + allFileSlicesFromFS + ". For partitions " + partitionPath);
-    validate(allFileSlicesFromMeta, allFileSlicesFromFS, partitionPath, "file slices");
-
-    LOG.info("Validation of all file groups succeeded for partition " + partitionPath);
+    validateFileSlices(
+        allFileSlicesFromMeta, allFileSlicesFromFS, partitionPath,
+        fsBasedContext.getMetaClient(), "all file groups");
   }
 
   /**
@@ -538,16 +615,8 @@ private void validateLatestBaseFiles(
 
     LOG.debug("Latest base file from metadata: " + latestFilesFromMetadata + ". For partitions " + partitionPath);
     LOG.debug("Latest base file from direct listing: " + latestFilesFromFS + ". For partitions " + partitionPath);
-    if (latestFilesFromMetadata.size() != latestFilesFromFS.size()
-        || !latestFilesFromMetadata.equals(latestFilesFromFS)) {
-      String message = "Validation of metadata get latest base file for partition " + partitionPath + " failed. "
-          + "Latest base file from metadata: " + latestFilesFromMetadata
-          + "Latest base file from direct listing: " + latestFilesFromFS;
-      LOG.error(message);
-      throw new HoodieValidationException(message);
-    } else {
-      LOG.info("Validation of getLatestBaseFiles succeeded for partition " + partitionPath);
-    }
+
+    validate(latestFilesFromMetadata, latestFilesFromFS, partitionPath, "latest base files");
   }
 
   /**
@@ -572,8 +641,9 @@ private void validateLatestFileSlices(
     LOG.debug("Latest file list from metadata: " + latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
     LOG.debug("Latest file list from direct listing: " + latestFileSlicesFromFS + ". For partition " + partitionPath);
 
-    validate(latestFileSlicesFromMetadataTable, latestFileSlicesFromFS, partitionPath, "file slices");
-    LOG.info("Validation of getLatestFileSlices succeeded for partition " + partitionPath);
+    validateFileSlices(
+        latestFileSlicesFromMetadataTable, latestFileSlicesFromFS, partitionPath,
+        fsBasedContext.getMetaClient(), "latest file slices");
   }
 
   private List<FileSlice> filterFileSliceBasedOnInflightCleaning(List<FileSlice> sortedLatestFileSliceList, Set<String> baseDataFilesForCleaning) {
@@ -594,6 +664,7 @@ private List<HoodieBaseFile> filterBaseFileBasedOnInflightCleaning(List<HoodieBa
         }).collect(Collectors.toList());
   }
 
+  @SuppressWarnings("rawtypes")
   private void validateAllColumnStats(
       HoodieMetadataValidationContext metadataTableBasedContext,
       HoodieMetadataValidationContext fsBasedContext,
@@ -601,14 +672,12 @@ private void validateAllColumnStats(
       Set<String> baseDataFilesForCleaning) {
 
     List<String> latestBaseFilenameList = getLatestBaseFileNames(fsBasedContext, partitionPath, baseDataFilesForCleaning);
-    List<HoodieColumnRangeMetadata<String>> metadataBasedColStats = metadataTableBasedContext
+    List<HoodieColumnRangeMetadata<Comparable>> metadataBasedColStats = metadataTableBasedContext
         .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
-    List<HoodieColumnRangeMetadata<String>> fsBasedColStats = fsBasedContext
+    List<HoodieColumnRangeMetadata<Comparable>> fsBasedColStats = fsBasedContext
         .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
 
     validate(metadataBasedColStats, fsBasedColStats, partitionPath, "column stats");
-
-    LOG.info("Validation of column stats succeeded for partition " + partitionPath);
   }
 
   private void validateBloomFilters(
@@ -624,8 +693,6 @@ private void validateBloomFilters(
         .getSortedBloomFilterList(partitionPath, latestBaseFilenameList);
 
     validate(metadataBasedBloomFilters, fsBasedBloomFilters, partitionPath, "bloom filters");
-
-    LOG.info("Validation of bloom filters succeeded for partition " + partitionPath);
   }
 
   private List<String> getLatestBaseFileNames(HoodieMetadataValidationContext fsBasedContext, String partitionPath, Set<String> baseDataFilesForCleaning) {
@@ -655,6 +722,121 @@ private <T> void validate(
     }
   }
 
+  private void validateFileSlices(
+      List<FileSlice> fileSliceListFromMetadataTable, List<FileSlice> fileSliceListFromFS,
+      String partitionPath, HoodieTableMetaClient metaClient, String label) {
+    boolean mismatch = false;
+    if (fileSliceListFromMetadataTable.size() != fileSliceListFromFS.size()) {
+      mismatch = true;
+    } else if (!fileSliceListFromMetadataTable.equals(fileSliceListFromFS)) {
+      for (int i = 0; i < fileSliceListFromMetadataTable.size(); i++) {
+        FileSlice fileSlice1 = fileSliceListFromMetadataTable.get(i);
+        FileSlice fileSlice2 = fileSliceListFromFS.get(i);
+        if (!Objects.equals(fileSlice1.getFileGroupId(), fileSlice2.getFileGroupId())
+            || !Objects.equals(fileSlice1.getBaseInstantTime(), fileSlice2.getBaseInstantTime())
+            || !Objects.equals(fileSlice1.getBaseFile(), fileSlice2.getBaseFile())) {
+          mismatch = true;
+          break;
+        }
+        if (!areFileSliceCommittedLogFilesMatching(fileSlice1, fileSlice2, metaClient)) {
+          mismatch = true;
+          break;
+        } else {
+          LOG.warn(String.format("There are uncommitted log files in the latest file slices "
+              + "but the committed log files match: %s %s", fileSlice1, fileSlice2));
+        }
+      }
+    }
+
+    if (mismatch) {
+      String message = String.format("Validation of %s for partition %s failed."
+              + "\n%s from metadata: %s\n%s from file system and base files: %s",
+          label, partitionPath, label, fileSliceListFromMetadataTable, label, fileSliceListFromFS);
+      LOG.error(message);
+      throw new HoodieValidationException(message);
+    } else {
+      LOG.info(String.format("Validation of %s succeeded for partition %s", label, partitionPath));
+    }
+  }
+
+  /**
+   * Compares committed log files from two file slices.
+   *
+   * @param fs1        File slice 1
+   * @param fs2        File slice 2
+   * @param metaClient {@link HoodieTableMetaClient} instance
+   * @return {@code true} if matching; {@code false} otherwise.
+   */
+  private boolean areFileSliceCommittedLogFilesMatching(
+      FileSlice fs1, FileSlice fs2, HoodieTableMetaClient metaClient) {
+    Set<String> fs1LogPathSet =
+        fs1.getLogFiles().map(f -> f.getPath().toString()).collect(Collectors.toSet());
+    Set<String> fs2LogPathSet =
+        fs2.getLogFiles().map(f -> f.getPath().toString()).collect(Collectors.toSet());
+    Set<String> commonLogPathSet = new HashSet<>(fs1LogPathSet);
+    commonLogPathSet.retainAll(fs2LogPathSet);
+    // Only keep log file paths that differ
+    fs1LogPathSet.removeAll(commonLogPathSet);
+    fs2LogPathSet.removeAll(commonLogPathSet);
+    // Check if the remaining log files are uncommitted.  If there is any log file
+    // that is committed, the committed log files of two file slices are different
+    FileSystem fileSystem = metaClient.getFs();
+    HoodieTimeline commitsTimeline = metaClient.getCommitsTimeline();
+    if (hasCommittedLogFiles(fileSystem, fs1LogPathSet, commitsTimeline)) {
+      LOG.error("The first file slice has committed log files that cause mismatching: "
+          + fs1);
+      return false;
+    }
+    if (hasCommittedLogFiles(fileSystem, fs2LogPathSet, commitsTimeline)) {
+      LOG.error("The second file slice has committed log files that cause mismatching: "
+          + fs2);
+      return false;
+    }
+    return true;
+  }
+
+  private boolean hasCommittedLogFiles(
+      FileSystem fs, Set<String> logFilePathSet, HoodieTimeline commitsTimeline) {
+    if (logFilePathSet.isEmpty()) {
+      return false;
+    }
+
+    AvroSchemaConverter converter = new AvroSchemaConverter();
+    HoodieTimeline completedInstantsTimeline = commitsTimeline.filterCompletedInstants();
+    HoodieTimeline inflightInstantsTimeline = commitsTimeline.filterInflights();
+
+    for (String logFilePathStr : logFilePathSet) {
+      HoodieLogFormat.Reader reader = null;
+      try {
+        Schema readerSchema =
+            converter.convert(Objects.requireNonNull(
+                TableSchemaResolver.readSchemaFromLogFile(fs, new Path(logFilePathStr))));
+        reader =
+            HoodieLogFormat.newReader(fs, new HoodieLogFile(new Path(logFilePathStr)), readerSchema);
+        // read the avro blocks
+        if (reader.hasNext()) {
+          HoodieLogBlock block = reader.next();
+          final String instantTime = block.getLogBlockHeader().get(INSTANT_TIME);
+          if (!completedInstantsTimeline.containsOrBeforeTimelineStarts(instantTime)
+              || inflightInstantsTimeline.containsInstant(instantTime)) {
+            // hit an uncommitted block possibly from a failed write
+            LOG.warn("Log file is uncommitted: " + logFilePathStr);
+          } else {
+            LOG.warn("Log file is committed: " + logFilePathStr);
+            return true;
+          }
+        } else {
+          LOG.warn("There is no log block in " + logFilePathStr);
+        }
+      } catch (IOException e) {
+        throw new HoodieValidationException("Validation failed due to IOException", e);
+      } finally {
+        FileIOUtils.closeQuietly(reader);
+      }
+    }
+    return false;
+  }
+
   public class AsyncMetadataTableValidateService extends HoodieAsyncService {
     private final transient ExecutorService executor = Executors.newSingleThreadExecutor();
 
@@ -711,10 +893,10 @@ public int compare(HoodieFileGroup o1, HoodieFileGroup o2) {
   }
 
   public static class HoodieColumnRangeMetadataComparator
-      implements Comparator<HoodieColumnRangeMetadata<String>>, Serializable {
+      implements Comparator<HoodieColumnRangeMetadata<Comparable>>, Serializable {
 
     @Override
-    public int compare(HoodieColumnRangeMetadata<String> o1, HoodieColumnRangeMetadata<String> o2) {
+    public int compare(HoodieColumnRangeMetadata<Comparable> o1, HoodieColumnRangeMetadata<Comparable> o2) {
       return o1.toString().compareTo(o2.toString());
     }
   }
@@ -744,7 +926,6 @@ public HoodieMetadataValidationContext(
           .enable(enableMetadataTable)
           .withMetadataIndexBloomFilter(enableMetadataTable)
           .withMetadataIndexColumnStats(enableMetadataTable)
-          .withMetadataIndexForAllColumns(enableMetadataTable)
           .withAssumeDatePartitioning(cfg.assumeDatePartitioning)
           .build();
       this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext,
@@ -756,6 +937,10 @@ public HoodieMetadataValidationContext(
       }
     }
 
+    public HoodieTableMetaClient getMetaClient() {
+      return metaClient;
+    }
+
     public List<HoodieBaseFile> getSortedLatestBaseFileList(String partitionPath) {
       return fileSystemView.getLatestBaseFiles(partitionPath)
           .sorted(new HoodieBaseFileComparator()).collect(Collectors.toList());
@@ -771,7 +956,8 @@ public List<HoodieFileGroup> getSortedAllFileGroupList(String partitionPath) {
           .sorted(new HoodieFileGroupComparator()).collect(Collectors.toList());
     }
 
-    public List<HoodieColumnRangeMetadata<String>> getSortedColumnStatsList(
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    public List<HoodieColumnRangeMetadata<Comparable>> getSortedColumnStatsList(
         String partitionPath, List<String> baseFileNameList) {
       LOG.info("All column names for getting column stats: " + allColumnNameList);
       if (enableMetadataTable) {
@@ -780,15 +966,7 @@ public List<HoodieColumnRangeMetadata<String>> getSortedColumnStatsList(
         return allColumnNameList.stream()
             .flatMap(columnName ->
                 tableMetadata.getColumnStats(partitionFileNameList, columnName).values().stream()
-                    .map(stats -> new HoodieColumnRangeMetadata<>(
-                        stats.getFileName(),
-                        columnName,
-                        stats.getMinValue(),
-                        stats.getMaxValue(),
-                        stats.getNullCount(),
-                        stats.getValueCount(),
-                        stats.getTotalSize(),
-                        stats.getTotalUncompressedSize()))
+                    .map(HoodieTableMetadataUtil::convertColumnStatsRecordToColumnRangeMetadata)
                     .collect(Collectors.toList())
                     .stream())
             .sorted(new HoodieColumnRangeMetadataComparator())
@@ -799,18 +977,6 @@ public List<HoodieColumnRangeMetadata<String>> getSortedColumnStatsList(
                     metaClient.getHadoopConf(),
                     new Path(new Path(metaClient.getBasePath(), partitionPath), filename),
                     allColumnNameList).stream())
-            .map(rangeMetadata -> new HoodieColumnRangeMetadata<String>(
-                rangeMetadata.getFilePath(),
-                rangeMetadata.getColumnName(),
-                // Note: here we ignore the type in the validation,
-                // since column stats from metadata table store the min/max values as String
-                rangeMetadata.getMinValue().toString(),
-                rangeMetadata.getMaxValue().toString(),
-                rangeMetadata.getNullCount(),
-                rangeMetadata.getValueCount(),
-                rangeMetadata.getTotalSize(),
-                rangeMetadata.getTotalUncompressedSize()
-            ))
             .sorted(new HoodieColumnRangeMetadataComparator())
             .collect(Collectors.toList());
       }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java
index 7d725ed6af37a..14b637d5e51b2 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java
@@ -65,7 +65,6 @@
  * --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  * --conf spark.sql.catalogImplementation=hive \
  * --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \
- * --packages org.apache.spark:spark-avro_2.12:3.1.2 \
  * $HUDI_DIR/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.12-0.11.0-SNAPSHOT.jar \
  * --mode dry_run \
  * --base-path base_path \
@@ -89,7 +88,6 @@
  * --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  * --conf spark.sql.catalogImplementation=hive \
  * --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \
- * --packages org.apache.spark:spark-avro_2.12:3.1.2 \
  * $HUDI_DIR/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.12-0.11.0-SNAPSHOT.jar \
  * --mode repair \
  * --base-path base_path \
@@ -112,7 +110,6 @@
  * --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  * --conf spark.sql.catalogImplementation=hive \
  * --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \
- * --packages org.apache.spark:spark-avro_2.12:3.1.2 \
  * $HUDI_DIR/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.12-0.11.0-SNAPSHOT.jar \
  * --mode dry_run \
  * --base-path base_path \
@@ -133,7 +130,6 @@
  * --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  * --conf spark.sql.catalogImplementation=hive \
  * --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \
- * --packages org.apache.spark:spark-avro_2.12:3.1.2 \
  * $HUDI_DIR/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.12-0.11.0-SNAPSHOT.jar \
  * --mode undo \
  * --base-path base_path \
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java
index 43e58d531ed0f..a2717a35617f3 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java
@@ -117,8 +117,8 @@ public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDi
         dataFiles.forEach(hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));
 
         // also need to copy over partition metadata
-        Path partitionMetaFile =
-            new Path(FSUtils.getPartitionPath(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
+        Path partitionMetaFile = HoodiePartitionMetadata.getPartitionMetafilePath(fs1,
+            FSUtils.getPartitionPath(baseDir, partition)).get();
         if (fs1.exists(partitionMetaFile)) {
           filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
         }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java
index c2cfa390d08e8..255393b232eb1 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java
@@ -206,9 +206,9 @@ private void exportAsHudi(JavaSparkContext jsc, Config cfg, List<String> partiti
       Stream<HoodieBaseFile> dataFiles = fsView.getLatestBaseFilesBeforeOrOn(partition, latestCommitTimestamp);
       dataFiles.forEach(hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));
       // also need to copy over partition metadata
-      Path partitionMetaFile =
-          new Path(FSUtils.getPartitionPath(cfg.sourceBasePath, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
       FileSystem fs = FSUtils.getFs(cfg.sourceBasePath, serConf.newCopy());
+      Path partitionMetaFile = HoodiePartitionMetadata.getPartitionMetafilePath(fs,
+          FSUtils.getPartitionPath(cfg.sourceBasePath, partition)).get();
       if (fs.exists(partitionMetaFile)) {
         filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
       }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
index 4dc0604ddbf21..5d1fd19267911 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
@@ -43,8 +43,8 @@
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.utilities.checkpointing.InitialCheckPointProvider;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
-import org.apache.hudi.utilities.exception.HoodieSourcePostProcessException;
 import org.apache.hudi.utilities.exception.HoodieSchemaPostProcessException;
+import org.apache.hudi.utilities.exception.HoodieSourcePostProcessException;
 import org.apache.hudi.utilities.schema.ChainedSchemaPostProcessor;
 import org.apache.hudi.utilities.schema.DelegatingSchemaProvider;
 import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
@@ -104,21 +104,26 @@
  * Bunch of helper methods.
  */
 public class UtilHelpers {
+
+  public static final String EXECUTE = "execute";
+  public static final String SCHEDULE = "schedule";
+  public static final String SCHEDULE_AND_EXECUTE = "scheduleandexecute";
+
   private static final Logger LOG = LogManager.getLogger(UtilHelpers.class);
 
   public static Source createSource(String sourceClass, TypedProperties cfg, JavaSparkContext jssc,
-                                    SparkSession sparkSession, SchemaProvider schemaProvider,
-                                    HoodieDeltaStreamerMetrics metrics) throws IOException {
+      SparkSession sparkSession, SchemaProvider schemaProvider,
+      HoodieDeltaStreamerMetrics metrics) throws IOException {
     try {
       try {
         return (Source) ReflectionUtils.loadClass(sourceClass,
-            new Class<?>[]{TypedProperties.class, JavaSparkContext.class,
+            new Class<?>[] {TypedProperties.class, JavaSparkContext.class,
                 SparkSession.class, SchemaProvider.class,
                 HoodieDeltaStreamerMetrics.class},
             cfg, jssc, sparkSession, schemaProvider, metrics);
       } catch (HoodieException e) {
         return (Source) ReflectionUtils.loadClass(sourceClass,
-            new Class<?>[]{TypedProperties.class, JavaSparkContext.class,
+            new Class<?>[] {TypedProperties.class, JavaSparkContext.class,
                 SparkSession.class, SchemaProvider.class},
             cfg, jssc, sparkSession, schemaProvider);
       }
@@ -238,7 +243,7 @@ public static void validateAndAddProperties(String[] configs, SparkLauncher spar
   /**
    * Parse Schema from file.
    *
-   * @param fs         File System
+   * @param fs File System
    * @param schemaFile Schema File
    */
   public static String parseSchema(FileSystem fs, String schemaFile) throws Exception {
@@ -267,6 +272,7 @@ private static SparkConf buildSparkConf(String appName, String defaultMaster, Ma
       sparkConf.set("spark.eventLog.overwrite", "true");
       sparkConf.set("spark.eventLog.enabled", "true");
     }
+    sparkConf.set("spark.ui.port", "8090");
     sparkConf.setIfMissing("spark.driver.maxResultSize", "2g");
     sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
     sparkConf.set("spark.hadoop.mapred.output.compress", "true");
@@ -300,13 +306,13 @@ public static JavaSparkContext buildSparkContext(String appName, String sparkMas
   /**
    * Build Hoodie write client.
    *
-   * @param jsc         Java Spark Context
-   * @param basePath    Base Path
-   * @param schemaStr   Schema
+   * @param jsc Java Spark Context
+   * @param basePath Base Path
+   * @param schemaStr Schema
    * @param parallelism Parallelism
    */
   public static SparkRDDWriteClient<HoodieRecordPayload> createHoodieClient(JavaSparkContext jsc, String basePath, String schemaStr,
-                                                                            int parallelism, Option<String> compactionStrategyClass, TypedProperties properties) {
+      int parallelism, Option<String> compactionStrategyClass, TypedProperties properties) {
     HoodieCompactionConfig compactionConfig = compactionStrategyClass
         .map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false)
             .withCompactionStrategy(ReflectionUtils.loadClass(strategy)).build())
@@ -466,8 +472,7 @@ public static SchemaProviderWithPostProcessor wrapSchemaProviderWithPostProcesso
         Option.ofNullable(createSchemaPostProcessor(schemaPostProcessorClass, cfg, jssc)));
   }
 
-  public static SchemaProvider createRowBasedSchemaProvider(StructType structType,
-                                                            TypedProperties cfg, JavaSparkContext jssc) {
+  public static SchemaProvider createRowBasedSchemaProvider(StructType structType, TypedProperties cfg, JavaSparkContext jssc) {
     SchemaProvider rowSchemaProvider = new RowBasedSchemaProvider(structType);
     return wrapSchemaProviderWithPostProcessor(rowSchemaProvider, cfg, jssc, null);
   }
@@ -476,13 +481,13 @@ public static SchemaProvider createRowBasedSchemaProvider(StructType structType,
    * Create latest schema provider for Target schema.
    *
    * @param structType spark data type of incoming batch.
-   * @param jssc       instance of {@link JavaSparkContext}.
-   * @param fs         instance of {@link FileSystem}.
-   * @param basePath   base path of the table.
+   * @param jssc instance of {@link JavaSparkContext}.
+   * @param fs instance of {@link FileSystem}.
+   * @param basePath base path of the table.
    * @return the schema provider where target schema refers to latest schema(either incoming schema or table schema).
    */
   public static SchemaProvider createLatestSchemaProvider(StructType structType,
-                                                          JavaSparkContext jssc, FileSystem fs, String basePath) {
+      JavaSparkContext jssc, FileSystem fs, String basePath) {
     SchemaProvider rowSchemaProvider = new RowBasedSchemaProvider(structType);
     Schema writeSchema = rowSchemaProvider.getTargetSchema();
     Schema latestTableSchema = writeSchema;
@@ -540,4 +545,12 @@ public static int retry(int maxRetryCount, CheckedSupplier<Integer> supplier, St
     return ret;
   }
 
+  public static String getSchemaFromLatestInstant(HoodieTableMetaClient metaClient) throws Exception {
+    TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
+    if (metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
+      throw new HoodieException("Cannot run clustering without any completed commits");
+    }
+    Schema schema = schemaResolver.getTableAvroSchema(false);
+    return schema.toString();
+  }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
index 84b7933767610..7e605dbd36a0a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
@@ -185,6 +185,7 @@ private void initializeTable() throws IOException {
       }
     }
     HoodieTableMetaClient.withPropertyBuilder()
+        .fromProperties(props)
         .setTableType(cfg.tableType)
         .setTableName(cfg.targetTableName)
         .setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
index 50338e5510c5b..0e57bd379acdb 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
@@ -278,6 +278,8 @@ public void refreshTimeline() throws IOException {
           .setKeyGeneratorClassProp(props.getProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(),
               SimpleKeyGenerator.class.getName()))
           .setPreCombineField(cfg.sourceOrderingField)
+          .setPartitionMetafileUseBaseFormat(props.getBoolean(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key(),
+              HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.defaultValue()))
           .initTable(new Configuration(jssc.hadoopConfiguration()),
             cfg.targetBasePath);
     }
@@ -371,6 +373,8 @@ public Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> readFromSource(
               HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()))
           .setKeyGeneratorClassProp(props.getProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(),
               SimpleKeyGenerator.class.getName()))
+          .setPartitionMetafileUseBaseFormat(props.getBoolean(HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key(),
+              HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.defaultValue()))
           .initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
     }
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
index c0c141db11de3..56124b82afc06 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
@@ -35,6 +35,7 @@
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieInstant.State;
@@ -127,7 +128,6 @@ public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Con
   public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configuration conf,
                              Option<TypedProperties> propsOverride) throws IOException {
     this.properties = combineProperties(cfg, propsOverride, jssc.hadoopConfiguration());
-
     if (cfg.initialCheckpointProvider != null && cfg.checkpoint == null) {
       InitialCheckPointProvider checkPointProvider =
           UtilHelpers.createInitialCheckpointProvider(cfg.initialCheckpointProvider, this.properties);
@@ -156,7 +156,14 @@ private static TypedProperties combineProperties(Config cfg, Option<TypedPropert
       hoodieConfig.setAll(UtilHelpers.readConfig(hadoopConf, new Path(cfg.propsFilePath), cfg.configs).getProps());
     }
 
+    // set any configs that Deltastreamer has to override explicitly
     hoodieConfig.setDefaultValue(DataSourceWriteOptions.RECONCILE_SCHEMA());
+    // we need auto adjustment enabled for deltastreamer since async table services are feasible within the same JVM.
+    hoodieConfig.setValue(HoodieWriteConfig.AUTO_ADJUST_LOCK_CONFIGS.key(), "true");
+    if (cfg.tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
+      // Explicitly set the table type
+      hoodieConfig.setValue(HoodieTableConfig.TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
+    }
 
     return hoodieConfig.getProps(true);
   }
@@ -388,6 +395,14 @@ public static class Config implements Serializable {
     @Parameter(names = {"--retry-last-pending-inline-clustering", "-rc"}, description = "Retry last pending inline clustering plan before writing to sink.")
     public Boolean retryLastPendingInlineClusteringJob = false;
 
+    @Parameter(names = {"--cluster-scheduling-weight"}, description = "Scheduling weight for clustering as defined in "
+        + "https://spark.apache.org/docs/latest/job-scheduling.html")
+    public Integer clusterSchedulingWeight = 1;
+
+    @Parameter(names = {"--cluster-scheduling-minshare"}, description = "Minshare for clustering as defined in "
+        + "https://spark.apache.org/docs/latest/job-scheduling.html")
+    public Integer clusterSchedulingMinShare = 0;
+
     public boolean isAsyncCompactionEnabled() {
       return continuousMode && !forceDisableCompaction
           && HoodieTableType.MERGE_ON_READ.equals(HoodieTableType.valueOf(tableType));
@@ -431,8 +446,10 @@ public boolean equals(Object o) {
               && Objects.equals(commitOnErrors, config.commitOnErrors)
               && Objects.equals(deltaSyncSchedulingWeight, config.deltaSyncSchedulingWeight)
               && Objects.equals(compactSchedulingWeight, config.compactSchedulingWeight)
+              && Objects.equals(clusterSchedulingWeight, config.clusterSchedulingWeight)
               && Objects.equals(deltaSyncSchedulingMinShare, config.deltaSyncSchedulingMinShare)
               && Objects.equals(compactSchedulingMinShare, config.compactSchedulingMinShare)
+              && Objects.equals(clusterSchedulingMinShare, config.clusterSchedulingMinShare)
               && Objects.equals(forceDisableCompaction, config.forceDisableCompaction)
               && Objects.equals(checkpoint, config.checkpoint)
               && Objects.equals(initialCheckpointProvider, config.initialCheckpointProvider)
@@ -447,11 +464,11 @@ public int hashCode() {
               transformerClassNames, sourceLimit, operation, filterDupes,
               enableHiveSync, maxPendingCompactions, maxPendingClustering, continuousMode,
               minSyncIntervalSeconds, sparkMaster, commitOnErrors,
-              deltaSyncSchedulingWeight, compactSchedulingWeight, deltaSyncSchedulingMinShare,
-              compactSchedulingMinShare, forceDisableCompaction, checkpoint,
+              deltaSyncSchedulingWeight, compactSchedulingWeight, clusterSchedulingWeight, deltaSyncSchedulingMinShare,
+              compactSchedulingMinShare, clusterSchedulingMinShare, forceDisableCompaction, checkpoint,
               initialCheckpointProvider, help);
     }
-  
+
     @Override
     public String toString() {
       return "Config{"
@@ -478,8 +495,10 @@ public String toString() {
               + ", commitOnErrors=" + commitOnErrors
               + ", deltaSyncSchedulingWeight=" + deltaSyncSchedulingWeight
               + ", compactSchedulingWeight=" + compactSchedulingWeight
+              + ", clusterSchedulingWeight=" + clusterSchedulingWeight
               + ", deltaSyncSchedulingMinShare=" + deltaSyncSchedulingMinShare
               + ", compactSchedulingMinShare=" + compactSchedulingMinShare
+              + ", clusterSchedulingMinShare=" + clusterSchedulingMinShare
               + ", forceDisableCompaction=" + forceDisableCompaction
               + ", checkpoint='" + checkpoint + '\''
               + ", initialCheckpointProvider='" + initialCheckpointProvider + '\''
@@ -762,7 +781,7 @@ protected Boolean onInitializingWriteClient(SparkRDDWriteClient writeClient) {
         if (asyncClusteringService.isPresent()) {
           asyncClusteringService.get().updateWriteClient(writeClient);
         } else {
-          asyncClusteringService = Option.ofNullable(new SparkAsyncClusteringService(writeClient));
+          asyncClusteringService = Option.ofNullable(new SparkAsyncClusteringService(new HoodieSparkEngineContext(jssc), writeClient));
           HoodieTableMetaClient meta = HoodieTableMetaClient.builder()
               .setConf(new Configuration(jssc.hadoopConfiguration()))
               .setBasePath(cfg.targetBasePath)
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java
index 3c72eb58af1e8..376c9cfae3730 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java
@@ -224,6 +224,8 @@ static void deepCopyConfigs(Config globalConfig, HoodieDeltaStreamer.Config tabl
       tableConfig.compactSchedulingWeight = globalConfig.compactSchedulingWeight;
       tableConfig.deltaSyncSchedulingMinShare = globalConfig.deltaSyncSchedulingMinShare;
       tableConfig.deltaSyncSchedulingWeight = globalConfig.deltaSyncSchedulingWeight;
+      tableConfig.clusterSchedulingWeight = globalConfig.clusterSchedulingWeight;
+      tableConfig.clusterSchedulingMinShare = globalConfig.clusterSchedulingMinShare;
       tableConfig.sparkMaster = globalConfig.sparkMaster;
     }
   }
@@ -377,6 +379,14 @@ public static class Config implements Serializable {
     @Parameter(names = {"--checkpoint"}, description = "Resume Delta Streamer from this checkpoint.")
     public String checkpoint = null;
 
+    @Parameter(names = {"--cluster-scheduling-weight"}, description = "Scheduling weight for clustering as defined in "
+        + "https://spark.apache.org/docs/latest/job-scheduling.html")
+    public Integer clusterSchedulingWeight = 1;
+
+    @Parameter(names = {"--cluster-scheduling-minshare"}, description = "Minshare for clustering as defined in "
+        + "https://spark.apache.org/docs/latest/job-scheduling.html")
+    public Integer clusterSchedulingMinShare = 0;
+
     @Parameter(names = {"--help", "-h"}, help = true)
     public Boolean help = false;
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
index b991f9d46cb0b..61dc4da9b679c 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
@@ -35,6 +35,8 @@
 import java.util.Map;
 import java.util.UUID;
 
+import static org.apache.hudi.async.AsyncClusteringService.CLUSTERING_POOL_NAME;
+
 /**
  * Utility Class to generate Spark Scheduling allocation file. This kicks in only when user sets
  * spark.scheduler.mode=FAIR at spark-submit time
@@ -61,13 +63,16 @@ public class SchedulerConfGenerator {
    * @param compactionWeight Scheduling weight for compaction
    * @param deltaSyncMinShare Minshare for delta sync
    * @param compactionMinShare Minshare for compaction
+   * @param clusteringMinShare Scheduling weight for clustering
+   * @param clusteringWeight Minshare for clustering
    * @return Spark scheduling configs
    */
   private static String generateConfig(Integer deltaSyncWeight, Integer compactionWeight, Integer deltaSyncMinShare,
-      Integer compactionMinShare) {
+      Integer compactionMinShare, Integer clusteringWeight, Integer clusteringMinShare) {
     return String.format(SPARK_SCHEDULING_PATTERN, DELTASYNC_POOL_NAME, SPARK_SCHEDULER_FAIR_MODE,
         deltaSyncWeight.toString(), deltaSyncMinShare.toString(), COMPACT_POOL_NAME, SPARK_SCHEDULER_FAIR_MODE,
-        compactionWeight.toString(), compactionMinShare.toString());
+        compactionWeight.toString(), compactionMinShare.toString(), CLUSTERING_POOL_NAME, SPARK_SCHEDULER_FAIR_MODE,
+        clusteringWeight.toString(), clusteringMinShare.toString());
   }
 
   /**
@@ -84,7 +89,9 @@ public static Map<String, String> getSparkSchedulingConfigs(HoodieDeltaStreamer.
     if (sparkSchedulerMode.isPresent() && SPARK_SCHEDULER_FAIR_MODE.equals(sparkSchedulerMode.get())
         && cfg.continuousMode && cfg.tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
       String sparkSchedulingConfFile = generateAndStoreConfig(cfg.deltaSyncSchedulingWeight,
-          cfg.compactSchedulingWeight, cfg.deltaSyncSchedulingMinShare, cfg.compactSchedulingMinShare);
+          cfg.compactSchedulingWeight, cfg.deltaSyncSchedulingMinShare, cfg.compactSchedulingMinShare,
+          cfg.clusterSchedulingWeight, cfg.clusterSchedulingMinShare);
+      LOG.warn("Spark scheduling config file " + sparkSchedulingConfFile);
       additionalSparkConfigs.put(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY(), sparkSchedulingConfFile);
     } else {
       LOG.warn("Job Scheduling Configs will not be in effect as spark.scheduler.mode "
@@ -100,14 +107,16 @@ public static Map<String, String> getSparkSchedulingConfigs(HoodieDeltaStreamer.
    * @param compactionWeight Scheduling weight for compaction
    * @param deltaSyncMinShare Minshare for delta sync
    * @param compactionMinShare Minshare for compaction
+   * @param clusteringMinShare Scheduling weight for clustering
+   * @param clusteringWeight Minshare for clustering
    * @return Return the absolute path of the tmp file which stores the spark schedule configs
    * @throws IOException Throws an IOException when write configs to file failed
    */
   private static String generateAndStoreConfig(Integer deltaSyncWeight, Integer compactionWeight,
-      Integer deltaSyncMinShare, Integer compactionMinShare) throws IOException {
+      Integer deltaSyncMinShare, Integer compactionMinShare, Integer clusteringWeight, Integer clusteringMinShare) throws IOException {
     File tempConfigFile = File.createTempFile(UUID.randomUUID().toString(), ".xml");
     BufferedWriter bw = new BufferedWriter(new FileWriter(tempConfigFile));
-    bw.write(generateConfig(deltaSyncWeight, compactionWeight, deltaSyncMinShare, compactionMinShare));
+    bw.write(generateConfig(deltaSyncWeight, compactionWeight, deltaSyncMinShare, compactionMinShare, clusteringWeight, clusteringMinShare));
     bw.close();
     LOG.info("Configs written to file" + tempConfigFile.getAbsolutePath());
     return tempConfigFile.getAbsolutePath();
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
index 2f7d9898b95b0..483e44830c7c1 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
@@ -26,14 +26,18 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
 
+import com.esotericsoftware.minlog.Log;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrameReader;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
@@ -42,6 +46,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 
 import static org.apache.hudi.utilities.sources.HoodieIncrSource.Config.DEFAULT_NUM_INSTANTS_PER_FETCH;
 import static org.apache.hudi.utilities.sources.HoodieIncrSource.Config.DEFAULT_READ_LATEST_INSTANT_ON_MISSING_CKPT;
@@ -50,7 +55,6 @@
 import static org.apache.hudi.utilities.sources.HoodieIncrSource.Config.NUM_INSTANTS_PER_FETCH;
 import static org.apache.hudi.utilities.sources.HoodieIncrSource.Config.READ_LATEST_INSTANT_ON_MISSING_CKPT;
 import static org.apache.hudi.utilities.sources.HoodieIncrSource.Config.SOURCE_FILE_FORMAT;
-
 /**
  * This source will use the S3 events meta information from hoodie table generate by {@link S3EventsSource}.
  */
@@ -71,6 +75,12 @@ static class Config {
     static final String S3_IGNORE_KEY_PREFIX = "hoodie.deltastreamer.source.s3incr.ignore.key.prefix";
     // control whether to ignore the s3 objects with this substring
     static final String S3_IGNORE_KEY_SUBSTRING = "hoodie.deltastreamer.source.s3incr.ignore.key.substring";
+    /**
+     *{@value #SPARK_DATASOURCE_OPTIONS} is json string, passed to the reader while loading dataset.
+     * Example delta streamer conf
+     * - --hoodie-conf hoodie.deltastreamer.source.s3incr.spark.datasource.options={"header":"true","encoding":"UTF-8"}
+     */
+    static final String SPARK_DATASOURCE_OPTIONS = "hoodie.deltastreamer.source.s3incr.spark.datasource.options";
   }
 
   public S3EventsHoodieIncrSource(
@@ -81,6 +91,22 @@ public S3EventsHoodieIncrSource(
     super(props, sparkContext, sparkSession, schemaProvider);
   }
 
+  private DataFrameReader getDataFrameReader(String fileFormat) {
+    DataFrameReader dataFrameReader = sparkSession.read().format(fileFormat);
+    if (!StringUtils.isNullOrEmpty(props.getString(Config.SPARK_DATASOURCE_OPTIONS, null))) {
+      final ObjectMapper mapper = new ObjectMapper();
+      Map<String, String> sparkOptionsMap = null;
+      try {
+        sparkOptionsMap = mapper.readValue(props.getString(Config.SPARK_DATASOURCE_OPTIONS), Map.class);
+      } catch (IOException e) {
+        throw new HoodieException(String.format("Failed to parse sparkOptions: %s", props.getString(Config.SPARK_DATASOURCE_OPTIONS)), e);
+      }
+      Log.info(String.format("sparkOptions loaded: %s", sparkOptionsMap));
+      dataFrameReader = dataFrameReader.options(sparkOptionsMap);
+    }
+    return dataFrameReader;
+  }
+
   @Override
   public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkptStr, long sourceLimit) {
     DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(HOODIE_SRC_BASE_PATH));
@@ -125,7 +151,7 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
           .filter(String.format("%s > '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
               queryTypeAndInstantEndpts.getRight().getLeft()));
     }
-    
+
     if (source.isEmpty()) {
       return Pair.of(Option.empty(), queryTypeAndInstantEndpts.getRight().getRight());
     }
@@ -141,7 +167,7 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
       filter = filter + " and s3.object.key not like '%" + props.getString(Config.S3_IGNORE_KEY_SUBSTRING) + "%'";
     }
     // add file format filtering by default
-    filter = filter +  " and s3.object.key like '%" + fileFormat + "%'";
+    filter = filter + " and s3.object.key like '%" + fileFormat + "%'";
 
     String s3FS = props.getString(Config.S3_FS_PREFIX, "s3").toLowerCase();
     String s3Prefix = s3FS + "://";
@@ -174,7 +200,8 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
     }
     Option<Dataset<Row>> dataset = Option.empty();
     if (!cloudFiles.isEmpty()) {
-      dataset = Option.of(sparkSession.read().format(fileFormat).load(cloudFiles.toArray(new String[0])));
+      DataFrameReader dataFrameReader = getDataFrameReader(fileFormat);
+      dataset = Option.of(dataFrameReader.load(cloudFiles.toArray(new String[0])));
     }
     return Pair.of(dataset, queryTypeAndInstantEndpts.getRight().getRight());
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java
index 9ca91893bec69..42e2556ea59fa 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java
@@ -91,9 +91,9 @@ public static class Config {
 
     public static final ConfigProperty<String> PRECOMBINE_FIELD_TYPE_PROP = ConfigProperty
         .key("hoodie.deltastreamer.source.json.kafka.post.processor.maxwell.precombine.field.type")
-        .defaultValue("DATA_STRING")
+        .defaultValue(DATE_STRING.toString())
         .withDocumentation("Data type of the preCombine field. could be NON_TIMESTAMP, DATE_STRING,"
-            + "UNIX_TIMESTAMP or EPOCHMILLISECONDS. DATA_STRING by default ");
+            + "UNIX_TIMESTAMP or EPOCHMILLISECONDS. DATE_STRING by default ");
 
     public static final ConfigProperty<String> PRECOMBINE_FIELD_FORMAT_PROP = ConfigProperty
         .key("hoodie.deltastreamer.source.json.kafka.post.processor.maxwell.precombine.field.format")
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHiveIncrementalPuller.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHiveIncrementalPuller.java
index f6ea5c0f55485..d6837a384aa0d 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHiveIncrementalPuller.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHiveIncrementalPuller.java
@@ -167,7 +167,7 @@ public void testPuller() throws IOException, URISyntaxException {
     puller.saveDelta();
     HoodieHiveClient assertingClient = new HoodieHiveClient(new HiveSyncConfig(getAssertionSyncConfig(cfg.tmpDb)), HiveTestUtil.getHiveConf(), fileSystem);
     String tmpTable = cfg.targetTable + "__" + cfg.sourceTable;
-    assertTrue(assertingClient.doesTableExist(tmpTable));
+    assertTrue(assertingClient.tableExists(tmpTable));
   }
 
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
new file mode 100644
index 0000000000000..9ce8eef313de3
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
+import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
+import org.apache.hudi.client.HoodieReadClient;
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.metadata.MetadataPartitionType;
+import org.apache.hudi.testutils.providers.SparkProvider;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestHoodieIndexer extends HoodieCommonTestHarness implements SparkProvider {
+
+  private static transient SparkSession spark;
+  private static transient SQLContext sqlContext;
+  private static transient JavaSparkContext jsc;
+  private static transient HoodieSparkEngineContext context;
+
+  @BeforeEach
+  public void init() throws IOException {
+    boolean initialized = spark != null;
+    if (!initialized) {
+      SparkConf sparkConf = conf();
+      SparkRDDWriteClient.registerClasses(sparkConf);
+      HoodieReadClient.addHoodieSupport(sparkConf);
+      spark = SparkSession.builder().config(sparkConf).getOrCreate();
+      sqlContext = spark.sqlContext();
+      jsc = new JavaSparkContext(spark.sparkContext());
+      context = new HoodieSparkEngineContext(jsc);
+    }
+    initPath();
+    metaClient = HoodieTestUtils.init(basePath, getTableType());
+  }
+
+  @Test
+  public void testGetRequestedPartitionTypes() {
+    HoodieIndexer.Config config = new HoodieIndexer.Config();
+    config.basePath = basePath;
+    config.tableName = "indexer_test";
+    config.indexTypes = "FILES,BLOOM_FILTERS,COLUMN_STATS";
+    HoodieIndexer indexer = new HoodieIndexer(jsc, config);
+    List<MetadataPartitionType> partitionTypes = indexer.getRequestedPartitionTypes(config.indexTypes);
+    assertFalse(partitionTypes.contains(MetadataPartitionType.FILES));
+    assertTrue(partitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS));
+    assertTrue(partitionTypes.contains(MetadataPartitionType.COLUMN_STATS));
+  }
+
+  @Test
+  public void testIsIndexBuiltForAllRequestedTypes() {
+    HoodieIndexer.Config config = new HoodieIndexer.Config();
+    config.basePath = basePath;
+    config.tableName = "indexer_test";
+    config.indexTypes = "BLOOM_FILTERS,COLUMN_STATS";
+    HoodieIndexer indexer = new HoodieIndexer(jsc, config);
+    HoodieIndexCommitMetadata commitMetadata = HoodieIndexCommitMetadata.newBuilder()
+        .setIndexPartitionInfos(Arrays.asList(new HoodieIndexPartitionInfo(
+            1,
+            MetadataPartitionType.COLUMN_STATS.getPartitionPath(),
+            "0000")))
+        .build();
+    assertFalse(indexer.isIndexBuiltForAllRequestedTypes(commitMetadata.getIndexPartitionInfos()));
+
+    config.indexTypes = "COLUMN_STATS";
+    indexer = new HoodieIndexer(jsc, config);
+    assertTrue(indexer.isIndexBuiltForAllRequestedTypes(commitMetadata.getIndexPartitionInfos()));
+  }
+
+  @Override
+  public HoodieEngineContext context() {
+    return context;
+  }
+
+  @Override
+  public SparkSession spark() {
+    return spark;
+  }
+
+  @Override
+  public SQLContext sqlContext() {
+    return sqlContext;
+  }
+
+  @Override
+  public JavaSparkContext jsc() {
+    return jsc;
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestUtilHelpers.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestUtilHelpers.java
deleted file mode 100644
index 45ffa1f2b459d..0000000000000
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestUtilHelpers.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.utilities;
-
-import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.utilities.transform.ChainedTransformer;
-import org.apache.hudi.utilities.transform.Transformer;
-
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SparkSession;
-import org.junit.jupiter.api.Nested;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class TestUtilHelpers {
-
-  public static class TransformerFoo implements Transformer {
-
-    @Override
-    public Dataset apply(JavaSparkContext jsc, SparkSession sparkSession, Dataset<Row> rowDataset, TypedProperties properties) {
-      return null;
-    }
-  }
-
-  public static class TransformerBar implements Transformer {
-
-    @Override
-    public Dataset apply(JavaSparkContext jsc, SparkSession sparkSession, Dataset<Row> rowDataset, TypedProperties properties) {
-      return null;
-    }
-  }
-
-  @Nested
-  public class TestCreateTransformer {
-
-    @Test
-    public void testCreateTransformerNotPresent() throws IOException {
-      assertFalse(UtilHelpers.createTransformer(null).isPresent());
-    }
-
-    @Test
-    public void testCreateTransformerLoadOneClass() throws IOException {
-      Transformer transformer = UtilHelpers.createTransformer(Collections.singletonList(TransformerFoo.class.getName())).get();
-      assertTrue(transformer instanceof ChainedTransformer);
-      List<String> transformerNames = ((ChainedTransformer) transformer).getTransformersNames();
-      assertEquals(1, transformerNames.size());
-      assertEquals(TransformerFoo.class.getName(), transformerNames.get(0));
-    }
-
-    @Test
-    public void testCreateTransformerLoadMultipleClasses() throws IOException {
-      List<String> classNames = Arrays.asList(TransformerFoo.class.getName(), TransformerBar.class.getName());
-      Transformer transformer = UtilHelpers.createTransformer(classNames).get();
-      assertTrue(transformer instanceof ChainedTransformer);
-      List<String> transformerNames = ((ChainedTransformer) transformer).getTransformersNames();
-      assertEquals(2, transformerNames.size());
-      assertEquals(TransformerFoo.class.getName(), transformerNames.get(0));
-      assertEquals(TransformerBar.class.getName(), transformerNames.get(1));
-    }
-
-    @Test
-    public void testCreateTransformerThrowsException() throws IOException {
-      Exception e = assertThrows(IOException.class, () -> {
-        UtilHelpers.createTransformer(Arrays.asList("foo", "bar"));
-      });
-      assertEquals("Could not load transformer class(es) [foo, bar]", e.getMessage());
-    }
-  }
-}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
index a4d91f2a50ade..a9de85ce5ac9e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
@@ -129,6 +129,7 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/uber_config.properties", dfs, dfsBasePath + "/config/uber_config.properties");
     UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/short_trip_uber_config.properties", dfs, dfsBasePath + "/config/short_trip_uber_config.properties");
     UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/clusteringjob.properties", dfs, dfsBasePath + "/clusteringjob.properties");
+    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/indexer.properties", dfs, dfsBasePath + "/indexer.properties");
 
     writeCommonPropsToFile(dfs, dfsBasePath);
 
@@ -137,7 +138,7 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     TypedProperties downstreamProps = new TypedProperties();
     downstreamProps.setProperty("include", "base.properties");
     downstreamProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
 
     // Source schema is the target schema of upstream table
     downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/target.avsc");
@@ -149,7 +150,7 @@ protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath,
     invalidProps.setProperty("include", "sql-transformer.properties");
     invalidProps.setProperty("hoodie.datasource.write.keygenerator.class", "invalid");
     invalidProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    invalidProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    invalidProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
     invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
     UtilitiesTestBase.Helpers.savePropsToDFS(invalidProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_INVALID);
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
index aa233d4e37d3e..2db72cbd4102e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
@@ -43,6 +43,7 @@
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.config.HoodieClusteringConfig;
@@ -56,6 +57,7 @@
 import org.apache.hudi.keygen.SimpleKeyGenerator;
 import org.apache.hudi.utilities.DummySchemaProvider;
 import org.apache.hudi.utilities.HoodieClusteringJob;
+import org.apache.hudi.utilities.HoodieIndexer;
 import org.apache.hudi.utilities.deltastreamer.DeltaSync;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
 import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
@@ -103,6 +105,8 @@
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
@@ -126,6 +130,9 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.utilities.UtilHelpers.EXECUTE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE;
+import static org.apache.hudi.utilities.UtilHelpers.SCHEDULE_AND_EXECUTE;
 import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY;
 import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
 import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
@@ -141,7 +148,7 @@
 /**
  * Basic tests against {@link HoodieDeltaStreamer}, by issuing bulk_inserts, upserts, inserts. Check counts at the end.
  */
-
+@Tag("functional")
 public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
 
   private static final Logger LOG = LogManager.getLogger(TestHoodieDeltaStreamer.class);
@@ -394,6 +401,22 @@ static void assertAtLeastNReplaceCommits(int minExpected, String tablePath, File
       assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
     }
 
+    static void assertPendingIndexCommit(String tablePath, FileSystem fs) {
+      HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(tablePath).setLoadActiveTimelineOnLoad(true).build();
+      HoodieTimeline timeline = meta.getActiveTimeline().getAllCommitsTimeline().filterPendingIndexTimeline();
+      LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants().collect(Collectors.toList()));
+      int numIndexCommits = (int) timeline.getInstants().count();
+      assertEquals(1, numIndexCommits, "Got=" + numIndexCommits + ", exp=1");
+    }
+
+    static void assertCompletedIndexCommit(String tablePath, FileSystem fs) {
+      HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(tablePath).setLoadActiveTimelineOnLoad(true).build();
+      HoodieTimeline timeline = meta.getActiveTimeline().getAllCommitsTimeline().filterCompletedIndexTimeline();
+      LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants().collect(Collectors.toList()));
+      int numIndexCommits = (int) timeline.getInstants().count();
+      assertEquals(1, numIndexCommits, "Got=" + numIndexCommits + ", exp=1");
+    }
+
     static void assertNoReplaceCommits(String tablePath, FileSystem fs) {
       HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(tablePath).setLoadActiveTimelineOnLoad(true).build();
       HoodieTimeline timeline = meta.getActiveTimeline().getCompletedReplaceTimeline();
@@ -580,32 +603,32 @@ public void testBulkInsertsAndUpsertsWithBootstrap() throws Exception {
     // Initial bulk insert
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // No new data => no commits.
     cfg.sourceLimit = 0;
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // upsert() #1
     cfg.sourceLimit = 2000;
     cfg.operation = WriteOperationType.UPSERT;
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1950, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1950, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
-    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*.parquet", sqlContext);
+    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
     assertEquals(1950, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
 
     // Perform bootstrap with tableBasePath as source
     String bootstrapSourcePath = dfsBasePath + "/src_bootstrapped";
     Dataset<Row> sourceDf = sqlContext.read()
         .format("org.apache.hudi")
-        .load(tableBasePath + "/*/*.parquet");
+        .load(tableBasePath);
     sourceDf.write().format("parquet").save(bootstrapSourcePath);
 
     String newDatasetBasePath = dfsBasePath + "/test_dataset_bootstrapped";
@@ -615,11 +638,11 @@ public void testBulkInsertsAndUpsertsWithBootstrap() throws Exception {
     cfg.configs.add("hoodie.bootstrap.parallelism=5");
     cfg.targetBasePath = newDatasetBasePath;
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    Dataset<Row> res = sqlContext.read().format("org.apache.hudi").load(newDatasetBasePath + "/*.parquet");
+    Dataset<Row> res = sqlContext.read().format("org.apache.hudi").load(newDatasetBasePath);
     LOG.info("Schema :");
     res.printSchema();
 
-    TestHelpers.assertRecordCount(1950, newDatasetBasePath + "/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1950, newDatasetBasePath, sqlContext);
     res.registerTempTable("bootstrapped");
     assertEquals(1950, sqlContext.sql("select distinct _hoodie_record_key from bootstrapped").count());
 
@@ -646,7 +669,7 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema,
       cfg.configs.add(SparkAvroPostProcessor.Config.SPARK_AVRO_POST_PROCESSOR_PROP_ENABLE + "=false");
     }
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // Upsert data produced with Schema B, pass Schema B
@@ -660,12 +683,12 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema,
     }
     new HoodieDeltaStreamer(cfg, jsc).sync();
     // out of 1000 new records, 500 are inserts, 450 are updates and 50 are deletes.
-    TestHelpers.assertRecordCount(1450, tableBasePath + "/*/*", sqlContext);
+    TestHelpers.assertRecordCount(1450, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
-    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*", sqlContext);
+    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
     assertEquals(1450, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
 
-    sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*").createOrReplaceTempView("tmp_trips");
+    sqlContext.read().format("org.apache.hudi").load(tableBasePath).createOrReplaceTempView("tmp_trips");
     long recordCount =
         sqlContext.sparkSession().sql("select * from tmp_trips where evoluted_optional_union_field is not NULL").count();
     assertEquals(950, recordCount);
@@ -686,9 +709,9 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema,
     cfg.configs.add(DataSourceWriteOptions.RECONCILE_SCHEMA().key() + "=true");
     new HoodieDeltaStreamer(cfg, jsc).sync();
     // again, 1000 new records, 500 are inserts, 450 are updates and 50 are deletes.
-    TestHelpers.assertRecordCount(1900, tableBasePath + "/*/*", sqlContext);
+    TestHelpers.assertRecordCount(1900, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00002", tableBasePath, dfs, 3);
-    counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*", sqlContext);
+    counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
     assertEquals(1900, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
 
     TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(dfs.getConf()).build());
@@ -736,8 +759,8 @@ private void testUpsertsContinuousMode(HoodieTableType tableType, String tempDir
       } else {
         TestHelpers.assertAtleastNCompactionCommits(5, tableBasePath, dfs);
       }
-      TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
-      TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
+      TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext);
+      TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext);
       return true;
     });
   }
@@ -958,6 +981,54 @@ private HoodieClusteringJob.Config buildHoodieClusteringUtilConfig(String basePa
     return config;
   }
 
+  private HoodieIndexer.Config buildIndexerConfig(String basePath,
+                                                  String tableName,
+                                                  String indexInstantTime,
+                                                  String runningMode,
+                                                  String indexTypes) {
+    HoodieIndexer.Config config = new HoodieIndexer.Config();
+    config.basePath = basePath;
+    config.tableName = tableName;
+    config.indexInstantTime = indexInstantTime;
+    config.propsFilePath = dfsBasePath + "/indexer.properties";
+    config.runningMode = runningMode;
+    config.indexTypes = indexTypes;
+    return config;
+  }
+
+  @Test
+  public void testHoodieIndexer() throws Exception {
+    String tableBasePath = dfsBasePath + "/asyncindexer";
+    HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 1000, "false");
+
+    deltaStreamerTestRunner(ds, (r) -> {
+      TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
+
+      Option<String> scheduleIndexInstantTime = Option.empty();
+      try {
+        HoodieIndexer scheduleIndexingJob = new HoodieIndexer(jsc,
+            buildIndexerConfig(tableBasePath, ds.getConfig().targetTableName, null, SCHEDULE, "COLUMN_STATS"));
+        scheduleIndexInstantTime = scheduleIndexingJob.doSchedule();
+      } catch (Exception e) {
+        LOG.info("Schedule indexing failed", e);
+        return false;
+      }
+      if (scheduleIndexInstantTime.isPresent()) {
+        TestHelpers.assertPendingIndexCommit(tableBasePath, dfs);
+        LOG.info("Schedule indexing success, now build index with instant time " + scheduleIndexInstantTime.get());
+        HoodieIndexer runIndexingJob = new HoodieIndexer(jsc,
+            buildIndexerConfig(tableBasePath, ds.getConfig().targetTableName, scheduleIndexInstantTime.get(), EXECUTE, "COLUMN_STATS"));
+        runIndexingJob.start(0);
+        LOG.info("Metadata indexing success");
+        TestHelpers.assertCompletedIndexCommit(tableBasePath, dfs);
+      } else {
+        LOG.warn("Metadata indexing failed");
+      }
+      return true;
+    });
+  }
+
+  @Disabled("HUDI-3710 to fix the ConcurrentModificationException")
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testHoodieAsyncClusteringJob(boolean shouldPassInClusteringInstantTime) throws Exception {
@@ -1011,7 +1082,7 @@ public void testAsyncClusteringService() throws Exception {
     // There should be 4 commits, one of which should be a replace commit
     TestHelpers.assertAtLeastNCommits(4, tableBasePath, dfs);
     TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
-    TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath, sqlContext);
   }
 
   /**
@@ -1039,7 +1110,7 @@ public void testAsyncClusteringServiceWithConflicts() throws Exception {
     // There should be 4 commits, one of which should be a replace commit
     TestHelpers.assertAtLeastNCommits(4, tableBasePath, dfs);
     TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
-    TestHelpers.assertDistinctRecordCount(1900, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertDistinctRecordCount(1900, tableBasePath, sqlContext);
   }
 
   @Test
@@ -1062,7 +1133,7 @@ public void testAsyncClusteringServiceWithCompaction() throws Exception {
     // There should be 4 commits, one of which should be a replace commit
     TestHelpers.assertAtLeastNCommits(4, tableBasePath, dfs);
     TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
-    TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath, sqlContext);
   }
 
   @ParameterizedTest
@@ -1127,28 +1198,28 @@ public void testHoodieAsyncClusteringJobWithScheduleAndExecute(String runningMod
           LOG.info("Cluster success");
         } else {
           LOG.warn("Import failed");
-          if (!runningMode.toLowerCase().equals(HoodieClusteringJob.EXECUTE)) {
+          if (!runningMode.toLowerCase().equals(EXECUTE)) {
             return false;
           }
         }
       } catch (Exception e) {
         LOG.warn("ScheduleAndExecute clustering failed", e);
         exception = e;
-        if (!runningMode.equalsIgnoreCase(HoodieClusteringJob.EXECUTE)) {
+        if (!runningMode.equalsIgnoreCase(EXECUTE)) {
           return false;
         }
       }
       switch (runningMode.toLowerCase()) {
-        case HoodieClusteringJob.SCHEDULE_AND_EXECUTE: {
+        case SCHEDULE_AND_EXECUTE: {
           TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
           return true;
         }
-        case HoodieClusteringJob.SCHEDULE: {
+        case SCHEDULE: {
           TestHelpers.assertAtLeastNReplaceRequests(2, tableBasePath, dfs);
           TestHelpers.assertNoReplaceCommits(tableBasePath, dfs);
           return true;
         }
-        case HoodieClusteringJob.EXECUTE: {
+        case EXECUTE: {
           TestHelpers.assertNoReplaceCommits(tableBasePath, dfs);
           return true;
         }
@@ -1168,15 +1239,15 @@ public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() t
     String tableBasePath = dfsBasePath + "/test_table2";
     String downstreamTableBasePath = dfsBasePath + "/test_downstream_table2";
 
-    HiveSyncConfig hiveSyncConfig = getHiveSyncConfig(tableBasePath, "hive_trips");
-
     // Initial bulk insert to ingest to first hudi table
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT,
         Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true);
+    // NOTE: We should not have need to set below config, 'datestr' should have assumed date partitioning
+    cfg.configs.add("hoodie.datasource.hive_sync.partition_fields=year,month,day");
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath, sqlContext);
     String lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // Now incrementally pull from the above hudi table and ingest to second table
@@ -1184,17 +1255,17 @@ public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() t
         TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath, WriteOperationType.BULK_INSERT,
             true, null);
     new HoodieDeltaStreamer(downstreamCfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, downstreamTableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamTableBasePath, dfs, 1);
 
     // No new data => no commits for upstream table
     cfg.sourceLimit = 0;
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // with no change in upstream table, no change in downstream too when pulled.
@@ -1202,20 +1273,20 @@ public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() t
         TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath,
             WriteOperationType.BULK_INSERT, true, DummySchemaProvider.class.getName());
     new HoodieDeltaStreamer(downstreamCfg1, jsc).sync();
-    TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, downstreamTableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamTableBasePath, dfs, 1);
 
     // upsert() #1 on upstream hudi table
     cfg.sourceLimit = 2000;
     cfg.operation = WriteOperationType.UPSERT;
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1950, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1950, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1950, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(1950, tableBasePath, sqlContext);
     lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
-    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*.parquet", sqlContext);
+    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
     assertEquals(1950, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
 
     // Incrementally pull changes in upstream hudi table and apply to downstream table
@@ -1224,18 +1295,20 @@ public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() t
             false, null);
     downstreamCfg.sourceLimit = 2000;
     new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
-    TestHelpers.assertRecordCount(2000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(2000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(2000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(2000, downstreamTableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(2000, downstreamTableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(2000, downstreamTableBasePath, sqlContext);
     String finalInstant =
         TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamTableBasePath, dfs, 2);
-    counts = TestHelpers.countsPerCommit(downstreamTableBasePath + "/*/*.parquet", sqlContext);
+    counts = TestHelpers.countsPerCommit(downstreamTableBasePath, sqlContext);
     assertEquals(2000, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
 
     // Test Hive integration
+    HiveSyncConfig hiveSyncConfig = getHiveSyncConfig(tableBasePath, "hive_trips");
+    hiveSyncConfig.partitionFields = CollectionUtils.createImmutableList("year", "month", "day");
     HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, hiveServer.getHiveConf(), dfs);
-    assertTrue(hiveClient.doesTableExist(hiveSyncConfig.tableName), "Table " + hiveSyncConfig.tableName + " should exist");
-    assertEquals(1, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
+    assertTrue(hiveClient.tableExists(hiveSyncConfig.tableName), "Table " + hiveSyncConfig.tableName + " should exist");
+    assertEquals(3, hiveClient.getAllPartitions(hiveSyncConfig.tableName).size(),
         "Table partitions should match the number of partitions we wrote");
     assertEquals(lastInstantForUpstreamTable,
         hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get(),
@@ -1259,14 +1332,14 @@ public void testNullSchemaProvider() throws Exception {
   public void testPayloadClassUpdate() throws Exception {
     String dataSetBasePath = dfsBasePath + "/test_dataset_mor";
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
-        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
+        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
         true, false, null, "MERGE_ON_READ");
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, dataSetBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, dataSetBasePath, sqlContext);
 
     //now create one more deltaStreamer instance and update payload class
     cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
-        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
+        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
         true, true, DummyAvroPayload.class.getName(), "MERGE_ON_READ");
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf());
 
@@ -1285,14 +1358,14 @@ public void testPayloadClassUpdate() throws Exception {
   public void testPayloadClassUpdateWithCOWTable() throws Exception {
     String dataSetBasePath = dfsBasePath + "/test_dataset_cow";
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
-        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
+        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
         true, false, null, null);
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, dataSetBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, dataSetBasePath, sqlContext);
 
     //now create one more deltaStreamer instance and update payload class
     cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
-        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
+        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
         true, true, DummyAvroPayload.class.getName(), null);
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf());
 
@@ -1314,7 +1387,7 @@ public void testFilterDupes() throws Exception {
     // Initial bulk insert
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // Generate the same 1000 records + 1000 new ones for upsert
@@ -1322,10 +1395,10 @@ public void testFilterDupes() throws Exception {
     cfg.sourceLimit = 2000;
     cfg.operation = WriteOperationType.INSERT;
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(2000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(2000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
     // 1000 records for commit 00000 & 1000 for commit 00001
-    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*.parquet", sqlContext);
+    List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
     assertEquals(1000, counts.get(0).getLong(1));
     assertEquals(1000, counts.get(1).getLong(1));
 
@@ -1374,7 +1447,7 @@ public void testDistributedTestDataSource() {
     assertEquals(1000, c);
   }
 
-  private static void prepareJsonKafkaDFSFiles(int numRecords, boolean createTopic, String topicName) throws IOException {
+  private static void prepareJsonKafkaDFSFiles(int numRecords, boolean createTopic, String topicName) {
     if (createTopic) {
       try {
         testUtils.createTopic(topicName, 2);
@@ -1394,7 +1467,7 @@ private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTrans
   private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTransformer, String sourceSchemaFile, String targetSchemaFile,
                                        String propsFileName, String parquetSourceRoot, boolean addCommonProps) throws IOException {
     prepareParquetDFSSource(useSchemaProvider, hasTransformer, sourceSchemaFile, targetSchemaFile, propsFileName, parquetSourceRoot, addCommonProps,
-        "not_there");
+        "partition_path");
   }
 
   private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTransformer, String sourceSchemaFile, String targetSchemaFile,
@@ -1434,7 +1507,7 @@ private void testParquetDFSSource(boolean useSchemaProvider, List<String> transf
             transformerClassNames, PROPS_FILENAME_TEST_PARQUET, false,
             useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath, sqlContext);
     testNum++;
 
     if (testEmptyBatch) {
@@ -1443,7 +1516,7 @@ private void testParquetDFSSource(boolean useSchemaProvider, List<String> transf
       TestParquetDFSSourceEmptyBatch.returnEmptyBatch = true;
       deltaStreamer.sync();
       // since we mimic'ed empty batch, total records should be same as first sync().
-      TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+      TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath, sqlContext);
       HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(jsc.hadoopConfiguration()).build();
 
       // validate table schema fetches valid schema from last but one commit.
@@ -1460,7 +1533,7 @@ private void testORCDFSSource(boolean useSchemaProvider, List<String> transforme
     orcProps.setProperty("include", "base.properties");
     orcProps.setProperty("hoodie.embed.timeline.server", "false");
     orcProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     if (useSchemaProvider) {
       orcProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/" + "source.avsc");
       if (transformerClassNames != null) {
@@ -1476,7 +1549,7 @@ private void testORCDFSSource(boolean useSchemaProvider, List<String> transforme
             transformerClassNames, PROPS_FILENAME_TEST_ORC, false,
             useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(ORC_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(ORC_NUM_RECORDS, tableBasePath, sqlContext);
     testNum++;
   }
 
@@ -1487,7 +1560,7 @@ private void prepareJsonKafkaDFSSource(String propsFileName, String autoResetVal
     props.setProperty("include", "base.properties");
     props.setProperty("hoodie.embed.timeline.server", "false");
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    props.setProperty("hoodie.datasource.write.partitionpath.field", "");
     props.setProperty("hoodie.deltastreamer.source.dfs.root", JSON_KAFKA_SOURCE_ROOT);
     props.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName);
     props.setProperty("hoodie.deltastreamer.source.kafka.checkpoint.type", kafkaCheckpointType);
@@ -1511,15 +1584,15 @@ private void testDeltaStreamerTransitionFromParquetToKafkaSource(boolean autoRes
     prepareParquetDFSFiles(parquetRecords, PARQUET_SOURCE_ROOT, FIRST_PARQUET_FILE_NAME, true, HoodieTestDataGenerator.TRIP_SCHEMA, HoodieTestDataGenerator.AVRO_TRIP_SCHEMA);
 
     prepareParquetDFSSource(true, false, "source_uber.avsc", "target_uber.avsc", PROPS_FILENAME_TEST_PARQUET,
-        PARQUET_SOURCE_ROOT, false);
+        PARQUET_SOURCE_ROOT, false, "");
     // delta streamer w/ parquet source
     String tableBasePath = dfsBasePath + "/test_dfs_to_kafka" + testNum;
     HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
         TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ParquetDFSSource.class.getName(),
-            Collections.EMPTY_LIST, PROPS_FILENAME_TEST_PARQUET, false,
+            Collections.emptyList(), PROPS_FILENAME_TEST_PARQUET, false,
             false, 100000, false, null, null, "timestamp", null), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(parquetRecords, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(parquetRecords, tableBasePath, sqlContext);
     deltaStreamer.shutdownGracefully();
 
     // prep json kafka source
@@ -1529,18 +1602,18 @@ private void testDeltaStreamerTransitionFromParquetToKafkaSource(boolean autoRes
     // delta streamer w/ json kafka source
     deltaStreamer = new HoodieDeltaStreamer(
         TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, JsonKafkaSource.class.getName(),
-            Collections.EMPTY_LIST, PROPS_FILENAME_TEST_JSON_KAFKA, false,
+            Collections.emptyList(), PROPS_FILENAME_TEST_JSON_KAFKA, false,
             true, 100000, false, null, null, "timestamp", null), jsc);
     deltaStreamer.sync();
     // if auto reset value is set to LATEST, this all kafka records so far may not be synced.
     int totalExpectedRecords = parquetRecords + ((autoResetToLatest) ? 0 : JSON_KAFKA_NUM_RECORDS);
-    TestHelpers.assertRecordCount(totalExpectedRecords, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(totalExpectedRecords, tableBasePath, sqlContext);
 
     // verify 2nd batch to test LATEST auto reset value.
     prepareJsonKafkaDFSFiles(20, false, topicName);
     totalExpectedRecords += 20;
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(totalExpectedRecords, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(totalExpectedRecords, tableBasePath, sqlContext);
     testNum++;
   }
 
@@ -1552,17 +1625,17 @@ public void testJsonKafkaDFSSource() throws Exception {
     String tableBasePath = dfsBasePath + "/test_json_kafka_table" + testNum;
     HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
         TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, JsonKafkaSource.class.getName(),
-            Collections.EMPTY_LIST, PROPS_FILENAME_TEST_JSON_KAFKA, false,
+            Collections.emptyList(), PROPS_FILENAME_TEST_JSON_KAFKA, false,
             true, 100000, false, null, null, "timestamp", null), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(JSON_KAFKA_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(JSON_KAFKA_NUM_RECORDS, tableBasePath, sqlContext);
 
     int totalRecords = JSON_KAFKA_NUM_RECORDS;
     int records = 10;
     totalRecords += records;
     prepareJsonKafkaDFSFiles(records, false, topicName);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext);
   }
 
   @Test
@@ -1574,20 +1647,20 @@ public void testKafkaTimestampType() throws Exception {
     String tableBasePath = dfsBasePath + "/test_json_kafka_table" + testNum;
     HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
         TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, JsonKafkaSource.class.getName(),
-            Collections.EMPTY_LIST, PROPS_FILENAME_TEST_JSON_KAFKA, false,
+            Collections.emptyList(), PROPS_FILENAME_TEST_JSON_KAFKA, false,
             true, 100000, false, null,
             null, "timestamp", String.valueOf(System.currentTimeMillis())), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(JSON_KAFKA_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(JSON_KAFKA_NUM_RECORDS, tableBasePath, sqlContext);
 
     prepareJsonKafkaDFSFiles(JSON_KAFKA_NUM_RECORDS, false, topicName);
     deltaStreamer = new HoodieDeltaStreamer(
         TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, JsonKafkaSource.class.getName(),
-            Collections.EMPTY_LIST, PROPS_FILENAME_TEST_JSON_KAFKA, false,
+            Collections.emptyList(), PROPS_FILENAME_TEST_JSON_KAFKA, false,
             true, 100000, false, null, null,
             "timestamp", String.valueOf(System.currentTimeMillis())), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(JSON_KAFKA_NUM_RECORDS * 2, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(JSON_KAFKA_NUM_RECORDS * 2, tableBasePath, sqlContext);
   }
 
   @Test
@@ -1610,6 +1683,7 @@ public void testParquetDFSSourceForEmptyBatch() throws Exception {
     testParquetDFSSource(false, null, true);
   }
 
+  @Disabled("HUDI-3707 To investigate problem with schema provider and transformer")
   @Test
   public void testParquetDFSSourceWithoutSchemaProviderAndTransformer() throws Exception {
     testParquetDFSSource(false, Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
@@ -1620,27 +1694,34 @@ public void testParquetDFSSourceWithSourceSchemaFileAndNoTransformer() throws Ex
     testParquetDFSSource(true, null);
   }
 
+  @Disabled("HUDI-3707 To investigate problem with schema provider and transformer")
   @Test
   public void testParquetDFSSourceWithSchemaFilesAndTransformer() throws Exception {
     testParquetDFSSource(true, Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
   }
 
-  @ParameterizedTest
-  @MethodSource("testORCDFSSource")
-  public void testORCDFSSourceWithoutSchemaProviderAndNoTransformer(boolean useSchemaProvider, List<String> transformerClassNames) throws Exception {
-    testORCDFSSource(useSchemaProvider, transformerClassNames);
+  @Test
+  public void testORCDFSSourceWithoutSchemaProviderAndNoTransformer() throws Exception {
+    testORCDFSSource(false, null);
+  }
+
+  @Disabled("HUDI-3707 To investigate problem with schema provider and transformer")
+  @Test
+  public void testORCDFSSourceWithSchemaProviderAndWithTransformer() throws Exception {
+    testORCDFSSource(true, Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
   }
 
   private void prepareCsvDFSSource(
       boolean hasHeader, char sep, boolean useSchemaProvider, boolean hasTransformer) throws IOException {
     String sourceRoot = dfsBasePath + "/csvFiles";
     String recordKeyField = (hasHeader || useSchemaProvider) ? "_row_key" : "_c0";
+    String partitionPath = (hasHeader || useSchemaProvider) ? "partition_path" : "";
 
     // Properties used for testing delta-streamer with CSV source
     TypedProperties csvProps = new TypedProperties();
     csvProps.setProperty("include", "base.properties");
     csvProps.setProperty("hoodie.datasource.write.recordkey.field", recordKeyField);
-    csvProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    csvProps.setProperty("hoodie.datasource.write.partitionpath.field", partitionPath);
     if (useSchemaProvider) {
       csvProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source-flattened.avsc");
       if (hasTransformer) {
@@ -1681,7 +1762,7 @@ private void testCsvDFSSource(
             transformerClassNames, PROPS_FILENAME_TEST_CSV, false,
             useSchemaProvider, 1000, false, null, null, sourceOrderingField, null), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(CSV_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(CSV_NUM_RECORDS, tableBasePath, sqlContext);
     testNum++;
   }
 
@@ -1719,6 +1800,7 @@ public void testCsvDFSSourceWithHeaderAndSepWithoutSchemaProviderAndWithTransfor
     testCsvDFSSource(true, '\t', false, Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
   }
 
+  @Disabled("HUDI-3707 To investigate problem with schema provider and transformer")
   @Test
   public void testCsvDFSSourceWithHeaderAndSepWithSchemaProviderAndTransformer() throws Exception {
     // The CSV files have header, the columns are separated by '\t'
@@ -1761,6 +1843,7 @@ public void testCsvDFSSourceNoHeaderWithoutSchemaProviderAndWithTransformer() th
     assertTrue(e.getMessage().contains("cannot resolve '`begin_lat`' given input columns:"));
   }
 
+  @Disabled("HUDI-3707 To investigate problem with schema provider and transformer")
   @Test
   public void testCsvDFSSourceNoHeaderWithSchemaProviderAndTransformer() throws Exception {
     // The CSV files do not have header, the columns are separated by '\t'
@@ -1775,7 +1858,7 @@ private void prepareSqlSource() throws IOException {
     sqlSourceProps.setProperty("include", "base.properties");
     sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false");
     sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query","select * from test_sql_table");
 
     UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE);
@@ -1801,9 +1884,10 @@ public void testSqlSourceSource() throws Exception {
             Collections.emptyList(), PROPS_FILENAME_TEST_SQL_SOURCE, false,
             false, 1000, false, null, null, "timestamp", null, true), jsc);
     deltaStreamer.sync();
-    TestHelpers.assertRecordCount(SQL_SOURCE_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(SQL_SOURCE_NUM_RECORDS, tableBasePath, sqlContext);
   }
 
+  @Disabled
   @Test
   public void testJdbcSourceIncrementalFetchInContinuousMode() {
     try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc")) {
@@ -1818,7 +1902,7 @@ public void testJdbcSourceIncrementalFetchInContinuousMode() {
 
       props.setProperty("hoodie.datasource.write.keygenerator.class", SimpleKeyGenerator.class.getName());
       props.setProperty("hoodie.datasource.write.recordkey.field", "ID");
-      props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+      props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
 
       UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-jdbc-source.properties");
 
@@ -1835,7 +1919,7 @@ public void testJdbcSourceIncrementalFetchInContinuousMode() {
       HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(cfg, jsc);
       deltaStreamerTestRunner(deltaStreamer, cfg, (r) -> {
         TestHelpers.assertAtleastNCompactionCommits(numRecords / sourceLimit + ((numRecords % sourceLimit == 0) ? 0 : 1), tableBasePath, dfs);
-        TestHelpers.assertRecordCount(numRecords, tableBasePath + "/*/*.parquet", sqlContext);
+        TestHelpers.assertRecordCount(numRecords, tableBasePath, sqlContext);
         return true;
       });
     } catch (Exception e) {
@@ -1857,7 +1941,7 @@ public void testHoodieIncrFallback() throws Exception {
     insertInTable(tableBasePath, 9, WriteOperationType.UPSERT);
     //No change as this fails with Path not exist error
     assertThrows(org.apache.spark.sql.AnalysisException.class, () -> new HoodieDeltaStreamer(downstreamCfg, jsc).sync());
-    TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*", sqlContext);
+    TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);
 
     if (downstreamCfg.configs == null) {
       downstreamCfg.configs = new ArrayList<>();
@@ -1870,8 +1954,8 @@ public void testHoodieIncrFallback() throws Exception {
     new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
     new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
 
-    long baseTableRecords = sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*.parquet").count();
-    long downStreamTableRecords = sqlContext.read().format("org.apache.hudi").load(downstreamTableBasePath + "/*/*.parquet").count();
+    long baseTableRecords = sqlContext.read().format("org.apache.hudi").load(tableBasePath).count();
+    long downStreamTableRecords = sqlContext.read().format("org.apache.hudi").load(downstreamTableBasePath).count();
     assertEquals(baseTableRecords, downStreamTableRecords);
   }
 
@@ -1901,10 +1985,11 @@ public void testInsertOverwriteTable() throws Exception {
     testDeltaStreamerWithSpecifiedOperation(dfsBasePath + "/insert_overwrite_table", WriteOperationType.INSERT_OVERWRITE_TABLE);
   }
 
+  @Disabled("Local run passing; flaky in CI environment.")
   @Test
   public void testDeletePartitions() throws Exception {
     prepareParquetDFSSource(false, false, "source.avsc", "target.avsc",
-        PROPS_FILENAME_TEST_PARQUET, PARQUET_SOURCE_ROOT, false, "partition_path");
+        PROPS_FILENAME_TEST_PARQUET, PARQUET_SOURCE_ROOT, false, "");
     String tableBasePath = dfsBasePath + "/test_parquet_table" + testNum;
     HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
         TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ParquetDFSSource.class.getName(),
@@ -1930,8 +2015,8 @@ void testDeltaStreamerWithSpecifiedOperation(final String tableBasePath, WriteOp
     // Initial insert
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // setting the operationType
@@ -1939,14 +2024,14 @@ void testDeltaStreamerWithSpecifiedOperation(final String tableBasePath, WriteOp
     // No new data => no commits.
     cfg.sourceLimit = 0;
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     cfg.sourceLimit = 1000;
     new HoodieDeltaStreamer(cfg, jsc).sync();
-    TestHelpers.assertRecordCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1950, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1950, tableBasePath, sqlContext);
     TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
   }
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
index e383236af18a3..13f5ad97cfc92 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
@@ -106,8 +106,8 @@ void testUpsertsContinuousModeWithMultipleWritersForConflicts(HoodieTableType ta
       } else {
         TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
       }
-      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
-      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
       return true;
     });
 
@@ -168,8 +168,8 @@ void testUpsertsContinuousModeWithMultipleWritersWithoutConflicts(HoodieTableTyp
       } else {
         TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
       }
-      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
-      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
       return true;
     });
 
@@ -236,8 +236,8 @@ void testLatestCheckpointCarryOverWithMultipleWriters(HoodieTableType tableType)
       } else {
         TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
       }
-      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
-      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
       return true;
     });
 
@@ -305,7 +305,7 @@ private static TypedProperties prepareMultiWriterProps(FileSystem fs, String bas
     props.setProperty("include", "sql-transformer.properties");
     props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source.avsc");
     props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target.avsc");
 
@@ -362,8 +362,8 @@ private void runJobsInParallel(String tableBasePath, HoodieTableType tableType,
       } else {
         TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommitsAfterCommit(3, lastSuccessfulCommit, tableBasePath, fs());
       }
-      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
-      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
       return true;
     };
 
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java
index da5c6cc66a2ff..cc2c96f2c8516 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java
@@ -34,6 +34,7 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
@@ -44,9 +45,10 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+@Tag("functional")
 public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBase {
 
-  private static volatile Logger log = LogManager.getLogger(TestHoodieMultiTableDeltaStreamer.class);
+  private static final Logger LOG = LogManager.getLogger(TestHoodieMultiTableDeltaStreamer.class);
 
   static class TestHelpers {
 
@@ -80,7 +82,7 @@ public void testInvalidHiveSyncProps() throws IOException {
     Exception e = assertThrows(HoodieException.class, () -> {
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Should fail when hive sync table not provided with enableHiveSync flag");
-    log.debug("Expected error when creating table execution objects", e);
+    LOG.debug("Expected error when creating table execution objects", e);
     assertTrue(e.getMessage().contains("Meta sync table field not provided!"));
   }
 
@@ -90,7 +92,7 @@ public void testInvalidPropsFilePath() throws IOException {
     Exception e = assertThrows(IllegalArgumentException.class, () -> {
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Should fail when invalid props file is provided");
-    log.debug("Expected error when creating table execution objects", e);
+    LOG.debug("Expected error when creating table execution objects", e);
     assertTrue(e.getMessage().contains("Please provide valid common config file path!"));
   }
 
@@ -100,7 +102,7 @@ public void testInvalidTableConfigFilePath() throws IOException {
     Exception e = assertThrows(IllegalArgumentException.class, () -> {
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Should fail when invalid table config props file path is provided");
-    log.debug("Expected error when creating table execution objects", e);
+    LOG.debug("Expected error when creating table execution objects", e);
     assertTrue(e.getMessage().contains("Please provide valid table config file path!"));
   }
 
@@ -128,7 +130,7 @@ public void testInvalidIngestionProps() {
       HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Creation of execution object should fail without kafka topic");
-    log.debug("Creation of execution object failed with error: " + e.getMessage(), e);
+    LOG.debug("Creation of execution object failed with error: " + e.getMessage(), e);
     assertTrue(e.getMessage().contains("Please provide valid table config arguments!"));
   }
 
@@ -150,19 +152,21 @@ public void testMultiTableExecutionWithKafkaSource() throws IOException {
     TypedProperties properties = executionContexts.get(1).getProperties();
     properties.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source_uber.avsc");
     properties.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target_uber.avsc");
+    properties.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
     properties.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName2);
     executionContexts.get(1).setProperties(properties);
     TypedProperties properties1 = executionContexts.get(0).getProperties();
     properties1.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source_short_trip_uber.avsc");
     properties1.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target_short_trip_uber.avsc");
+    properties1.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
     properties1.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName1);
     executionContexts.get(0).setProperties(properties1);
     String targetBasePath1 = executionContexts.get(0).getConfig().targetBasePath;
     String targetBasePath2 = executionContexts.get(1).getConfig().targetBasePath;
     streamer.sync();
 
-    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, targetBasePath1 + "/*/*.parquet", sqlContext);
-    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(10, targetBasePath2 + "/*/*.parquet", sqlContext);
+    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, targetBasePath1, sqlContext);
+    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(10, targetBasePath2, sqlContext);
 
     //insert updates for already existing records in kafka topics
     testUtils.sendMessages(topicName1, Helpers.jsonifyRecords(dataGenerator.generateUpdatesAsPerSchema("001", 5, HoodieTestDataGenerator.TRIP_SCHEMA)));
@@ -177,8 +181,8 @@ public void testMultiTableExecutionWithKafkaSource() throws IOException {
     assertTrue(streamer.getFailedTables().isEmpty());
 
     //assert the record count matches now
-    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, targetBasePath1 + "/*/*.parquet", sqlContext);
-    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(10, targetBasePath2 + "/*/*.parquet", sqlContext);
+    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, targetBasePath1, sqlContext);
+    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(10, targetBasePath2, sqlContext);
     testNum++;
   }
 
@@ -251,7 +255,7 @@ private TypedProperties getParquetProps(String parquetSourceRoot) {
     TypedProperties props = new TypedProperties();
     props.setProperty("include", "base.properties");
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     props.setProperty("hoodie.deltastreamer.source.dfs.root", parquetSourceRoot);
     return props;
   }
@@ -271,7 +275,7 @@ private void ingestPerParquetSourceProps(List<TableExecutionContext> executionCo
 
   private void syncAndVerify(HoodieMultiTableDeltaStreamer streamer, String targetBasePath1, String targetBasePath2, long table1ExpectedRecords, long table2ExpectedRecords) {
     streamer.sync();
-    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table1ExpectedRecords, targetBasePath1 + "/*/*.parquet", sqlContext);
-    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table2ExpectedRecords, targetBasePath2 + "/*/*.parquet", sqlContext);
+    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table1ExpectedRecords, targetBasePath1, sqlContext);
+    TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table2ExpectedRecords, targetBasePath2, sqlContext);
   }
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
index 8464740bf2bf0..cc93fe497563f 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
@@ -201,7 +201,7 @@ protected static HiveSyncConfig getHiveSyncConfig(String basePath, String tableN
    * 
    * @throws IOException
    */
-  private static void clearHiveDb() throws IOException {
+  private static void clearHiveDb() throws Exception {
     HiveConf hiveConf = new HiveConf();
     // Create Dummy hive sync config
     HiveSyncConfig hiveSyncConfig = getHiveSyncConfig("/dummy", "dummy");
diff --git a/hudi-utilities/src/test/resources/delta-streamer-config/indexer.properties b/hudi-utilities/src/test/resources/delta-streamer-config/indexer.properties
new file mode 100644
index 0000000000000..aa42f8fd27057
--- /dev/null
+++ b/hudi-utilities/src/test/resources/delta-streamer-config/indexer.properties
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.metadata.enable=true
+hoodie.metadata.index.async=true
+hoodie.metadata.index.column.stats.enable=true
+hoodie.metadata.index.check.timeout.seconds=60
+hoodie.write.concurrency.mode=optimistic_concurrency_control
+hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
\ No newline at end of file
diff --git a/packaging/hudi-datahub-sync-bundle/pom.xml b/packaging/hudi-datahub-sync-bundle/pom.xml
new file mode 100644
index 0000000000000..d45f07f31c23f
--- /dev/null
+++ b/packaging/hudi-datahub-sync-bundle/pom.xml
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.  
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hudi</artifactId>
+    <groupId>org.apache.hudi</groupId>
+    <version>0.12.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>hudi-datahub-sync-bundle</artifactId>
+  <packaging>jar</packaging>
+
+  <properties>
+    <checkstyle.skip>true</checkstyle.skip>
+    <main.basedir>${project.parent.basedir}</main.basedir>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven-shade-plugin.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createSourcesJar>${shadeSources}</createSourcesJar>
+              <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
+              </dependencyReducedPomLocation>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+                  <addHeader>true</addHeader>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+              </transformers>
+              <artifactSet>
+                <includes>
+                  <include>org.apache.hudi:hudi-common</include>
+                  <include>org.apache.hudi:hudi-hadoop-mr</include>
+                  <include>org.apache.hudi:hudi-sync-common</include>
+                  <include>org.apache.hudi:hudi-datahub-sync</include>
+
+                  <include>io.acryl:datahub-client</include>
+                  <include>com.beust:jcommander</include>
+                  <include>org.apache.httpcomponents:fluent-hc</include>
+                  <include>org.apache.httpcomponents:httpcore</include>
+                  <include>org.apache.httpcomponents:httpclient</include>
+                  <include>org.apache.httpcomponents:httpasyncclient</include>
+                  <include>org.apache.httpcomponents:httpcore-nio</include>
+                </includes>
+              </artifactSet>
+              <createDependencyReducedPom>false</createDependencyReducedPom>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                    <exclude>META-INF/services/javax.*</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <finalName>${project.artifactId}-${project.version}</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+      <resource>
+        <directory>src/test/resources</directory>
+      </resource>
+    </resources>
+  </build>
+
+  <dependencies>
+    <!-- Hoodie -->
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-hadoop-mr-bundle</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-datahub-sync</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Parquet -->
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+      <version>${parquet.version}</version>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- Avro -->
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+      <scope>compile</scope>
+    </dependency>
+
+  </dependencies>
+</project>
diff --git a/packaging/hudi-datahub-sync-bundle/src/main/java/org/apache/hudi/datahub/bundle/Main.java b/packaging/hudi-datahub-sync-bundle/src/main/java/org/apache/hudi/datahub/bundle/Main.java
new file mode 100644
index 0000000000000..ab862f33be42f
--- /dev/null
+++ b/packaging/hudi-datahub-sync-bundle/src/main/java/org/apache/hudi/datahub/bundle/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.datahub.bundle;
+
+import org.apache.hudi.common.util.ReflectionUtils;
+
+/**
+ * A simple main class to dump all classes loaded in current classpath.
+ *
+ * This is a workaround for generating sources and javadoc jars for packaging modules. The maven plugins for generating
+ * javadoc and sources plugins do not generate corresponding jars if there are no source files.
+ *
+ * This class does not have anything to do with Hudi but is there to keep mvn javadocs/source plugin happy.
+ */
+public class Main {
+
+  public static void main(String[] args) {
+    ReflectionUtils.getTopLevelClassesInClasspath(Main.class).forEach(System.out::println);
+  }
+}
diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml
index 30ee37a4ecf2f..584c3871cd449 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -34,8 +34,8 @@
     <flink.bundle.hive.scope>provided</flink.bundle.hive.scope>
     <flink.bundle.shade.prefix>org.apache.hudi.</flink.bundle.shade.prefix>
     <javax.servlet.version>3.1.0</javax.servlet.version>
-    <!-- override to be same with flink 1.12.2 -->
-    <parquet.version>1.11.1</parquet.version>
+    <!-- override to be same with flink 1.15.x -->
+    <parquet.version>${flink.format.parquet.version}</parquet.version>
     <hive.version>2.3.1</hive.version>
     <thrift.version>0.9.3</thrift.version>
   </properties>
@@ -70,6 +70,7 @@
                   <resource>META-INF/LICENSE</resource>
                   <file>target/classes/META-INF/LICENSE</file>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -138,7 +139,7 @@
                   <include>org.apache.hive:hive-service</include>
                   <include>org.apache.hive:hive-service-rpc</include>
 		  <include>org.apache.hive:hive-exec</include>
-		  <include>org.apache.hive:hive-standalone-metastore</include>
+                  <include>org.apache.hive:hive-standalone-metastore</include>
                   <include>org.apache.hive:hive-metastore</include>
                   <include>org.apache.hive:hive-jdbc</include>
                   <include>org.datanucleus:datanucleus-core</include>
@@ -148,10 +149,18 @@
 
                   <include>org.apache.hbase:hbase-common</include>
                   <include>org.apache.hbase:hbase-client</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
                   <include>org.apache.hbase:hbase-server</include>
-                  <include>org.apache.hbase:hbase-protocol</include>
-                  <include>org.apache.htrace:htrace-core</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>commons-codec:commons-codec</include>
+                  <include>commons-io:commons-io</include>
                 </includes>
               </artifactSet>
               <relocations>
@@ -163,6 +172,25 @@
                   <pattern>org.apache.avro.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}org.apache.avro.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>com.yammer.metrics.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}com.yammer.metrics.</shadedPattern>
@@ -192,6 +220,74 @@
                   <pattern>com.fasterxml.jackson.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}com.fasterxml.jackson.</shadedPattern>
                 </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -201,6 +297,8 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
@@ -273,11 +371,23 @@
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-hadoop-mr</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <artifactId>guava</artifactId>
+          <groupId>com.google.guava</groupId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-hive-sync</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <artifactId>guava</artifactId>
+          <groupId>com.google.guava</groupId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hudi</groupId>
@@ -288,6 +398,10 @@
           <artifactId>rocksdbjni</artifactId>
           <groupId>org.rocksdb</groupId>
         </exclusion>
+        <exclusion>
+          <artifactId>guava</artifactId>
+          <groupId>com.google.guava</groupId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -551,66 +665,6 @@
       <artifactId>jackson-annotations</artifactId>
       <scope>compile</scope>
     </dependency>
-
-    <!-- Hbase -->
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-      <exclusions>
-        <exclusion>
-          <artifactId>guava</artifactId>
-          <groupId>com.google.guava</groupId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <scope>compile</scope>
-      <exclusions>
-        <exclusion>
-          <artifactId>guava</artifactId>
-          <groupId>com.google.guava</groupId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.codehaus.jackson</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-client</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-protocol</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.htrace</groupId>
-      <artifactId>htrace-core</artifactId>
-      <version>${htrace.version}</version>
-    </dependency>
   </dependencies>
 
   <profiles>
diff --git a/packaging/hudi-gcp-bundle/pom.xml b/packaging/hudi-gcp-bundle/pom.xml
new file mode 100644
index 0000000000000..7121076f271b4
--- /dev/null
+++ b/packaging/hudi-gcp-bundle/pom.xml
@@ -0,0 +1,178 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.  
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hudi</artifactId>
+    <groupId>org.apache.hudi</groupId>
+    <version>0.12.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>hudi-gcp-bundle</artifactId>
+  <packaging>jar</packaging>
+
+  <properties>
+    <checkstyle.skip>true</checkstyle.skip>
+    <main.basedir>${project.parent.basedir}</main.basedir>
+  </properties>
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>com.google.cloud</groupId>
+        <artifactId>libraries-bom</artifactId>
+        <version>25.1.0</version>
+        <type>pom</type>
+        <scope>import</scope>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <archive>
+            <manifest>
+              <mainClass>org.apache.hudi.gcp.bigquery.BigQuerySyncTool</mainClass>
+            </manifest>
+          </archive>
+          <descriptorRefs>
+            <descriptorRef>jar-with-dependencies</descriptorRef>
+          </descriptorRefs>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven-shade-plugin.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createSourcesJar>${shadeSources}</createSourcesJar>
+              <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
+              </dependencyReducedPomLocation>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+                  <addHeader>true</addHeader>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+              </transformers>
+              <artifactSet>
+                <includes>
+                  <include>org.apache.hudi:hudi-common</include>
+                  <include>org.apache.hudi:hudi-hadoop-mr</include>
+                  <include>org.apache.hudi:hudi-sync-common</include>
+                  <include>org.apache.hudi:hudi-gcp</include>
+
+                  <include>com.google.cloud:google-cloud-bigquery</include>
+                  <include>com.beust:jcommander</include>
+                </includes>
+              </artifactSet>
+              <createDependencyReducedPom>false</createDependencyReducedPom>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                    <exclude>META-INF/services/javax.*</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <finalName>${project.artifactId}-${project.version}</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+      <resource>
+        <directory>src/test/resources</directory>
+      </resource>
+    </resources>
+  </build>
+
+  <dependencies>
+    <!-- Hoodie -->
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-hadoop-mr-bundle</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-sync-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-gcp</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-bigquery</artifactId>
+    </dependency>
+
+    <!-- Parquet -->
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+      <version>${parquet.version}</version>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- Avro -->
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+      <scope>compile</scope>
+    </dependency>
+
+  </dependencies>
+</project>
diff --git a/packaging/hudi-gcp-bundle/src/main/java/org/apache/hudi/gcp/bigquery/bundle/Main.java b/packaging/hudi-gcp-bundle/src/main/java/org/apache/hudi/gcp/bigquery/bundle/Main.java
new file mode 100644
index 0000000000000..75324f64fdea7
--- /dev/null
+++ b/packaging/hudi-gcp-bundle/src/main/java/org/apache/hudi/gcp/bigquery/bundle/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery.bundle;
+
+import org.apache.hudi.common.util.ReflectionUtils;
+
+/**
+ * A simple main class to dump all classes loaded in current classpath.
+ *
+ * This is a workaround for generating sources and javadoc jars for packaging modules. The maven plugins for generating
+ * javadoc and sources plugins do not generate corresponding jars if there are no source files.
+ *
+ * This class does not have anything to do with Hudi but is there to keep mvn javadocs/source plugin happy.
+ */
+public class Main {
+
+  public static void main(String[] args) {
+    ReflectionUtils.getTopLevelClassesInClasspath(Main.class).forEach(System.out::println);
+  }
+}
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index f6215b1e017a5..612a1b7f30b8b 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -55,12 +55,13 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
                 </transformer>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
-                    <addHeader>true</addHeader>
+                  <addHeader>true</addHeader>
                 </transformer>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
                   <resource>META-INF/LICENSE</resource>
                   <file>target/classes/META-INF/LICENSE</file>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -74,11 +75,19 @@
                   <include>com.esotericsoftware:minlog</include>
                   <include>org.apache.hbase:hbase-common</include>
                   <include>org.apache.hbase:hbase-client</include>
-                  <include>org.apache.hbase:hbase-protocol</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
                   <include>org.apache.hbase:hbase-server</include>
-                  <include>org.apache.htrace:htrace-core</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>com.yammer.metrics:metrics-core</include>
                   <include>com.google.guava:guava</include>
+                  <include>commons-io:commons-io</include>
                 </includes>
               </artifactSet>
               <relocations>
@@ -102,6 +111,25 @@
                   <pattern>org.apache.avro.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.apache.parquet.avro.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.parquet.avro.</shadedPattern>
@@ -110,6 +138,74 @@
                   <pattern>com.google.common.</pattern>
                   <shadedPattern>org.apache.hudi.com.google.common.</shadedPattern>
                 </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <createDependencyReducedPom>false</createDependencyReducedPom>
               <filters>
@@ -120,6 +216,8 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
@@ -167,48 +265,5 @@
       <version>${avro.version}</version>
       <scope>compile</scope>
     </dependency>
-
-    <dependency>
-      <groupId>org.apache.htrace</groupId>
-      <artifactId>htrace-core</artifactId>
-      <version>${htrace.version}</version>
-      <scope>compile</scope>
-    </dependency>
-
-    <!-- HBase -->
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <scope>compile</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.codehaus.jackson</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
   </dependencies>
 </project>
diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml
index 75fce574eb3d6..dd40a8b5177c5 100644
--- a/packaging/hudi-hive-sync-bundle/pom.xml
+++ b/packaging/hudi-hive-sync-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -55,12 +55,13 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
                 </transformer>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
-                    <addHeader>true</addHeader>
+                  <addHeader>true</addHeader>
                 </transformer>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
                   <resource>META-INF/LICENSE</resource>
                   <file>target/classes/META-INF/LICENSE</file>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -71,17 +72,129 @@
 
                   <include>com.beust:jcommander</include>
                   <include>org.apache.avro:avro</include>
+                  <include>org.apache.hbase:hbase-common</include>
+                  <include>org.apache.hbase:hbase-client</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
+                  <include>org.apache.hbase:hbase-server</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>org.apache.parquet:parquet-avro</include>
                   <include>com.esotericsoftware:kryo-shaded</include>
                   <include>org.objenesis:objenesis</include>
                   <include>com.esotericsoftware:minlog</include>
+                  <include>commons-io:commons-io</include>
                 </includes>
               </artifactSet>
               <relocations>
+                <relocation>
+                  <pattern>com.esotericsoftware.kryo.</pattern>
+                  <shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.objenesis.</pattern>
+                  <shadedPattern>org.apache.hudi.org.objenesis.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.esotericsoftware.minlog.</pattern>
+                  <shadedPattern>org.apache.hudi.com.esotericsoftware.minlog.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.apache.avro.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <createDependencyReducedPom>false</createDependencyReducedPom>
               <filters>
@@ -92,6 +205,8 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml
index b53e02aaf7768..ce18681fc2d81 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -62,6 +62,7 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                   <resource>META-INF/services/org.apache.spark.sql.sources.DataSourceRegister</resource>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -85,6 +86,20 @@
                   <include>org.apache.hudi:hudi-aws</include>
                   <include>org.apache.hudi:hudi-integ-test</include>
 
+                  <include>org.apache.hbase:hbase-common</include>
+                  <include>org.apache.hbase:hbase-client</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
+                  <include>org.apache.hbase:hbase-server</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
+                  <include>commons-io:commons-io</include>
+
                   <include>org.jetbrains.kotlin:kotlin-stdlib-jdk8</include>
                   <include>org.jetbrains.kotlin:kotlin-stdlib</include>
                   <include>org.jetbrains.kotlin:kotlin-stdlib-common</include>
@@ -109,6 +124,7 @@
                   <include>org.mortbay.jetty:jetty-util</include>
 
                   <include>org.rocksdb:rocksdbjni</include>
+                  <include>com.github.ben-manes.caffeine:caffeine</include>
                   <include>com.beust:jcommander</include>
                   <include>com.twitter:bijection-avro_${scala.binary.version}</include>
                   <include>com.twitter:bijection-core_${scala.binary.version}</include>
@@ -133,7 +149,6 @@
 
                   <include>org.apache.hive:hive-common</include>
                   <include>org.apache.hive:hive-service</include>
-                  <include>org.apache.hive:hive-metastore</include>
                   <include>org.apache.hive:hive-jdbc</include>
                   <include>org.apache.hive:hive-exec</include>
 
@@ -156,13 +171,19 @@
                   <include>com.fasterxml.jackson.core:jackson-databind</include>
                   <include>com.fasterxml.jackson.dataformat:jackson-dataformat-yaml</include>
 
-                  <include>org.apache.htrace:htrace-core</include>
                   <include>org.apache.curator:curator-framework</include>
                   <include>org.apache.curator:curator-client</include>
                   <include>org.apache.curator:curator-recipes</include>
                 </includes>
               </artifactSet>
               <relocations>
+                <!-- NOTE: We have to relocate all classes w/in org.apache.spark.sql.avro to avoid
+                           potential classpath collisions in case users would like to also use "spark-avro" w/in
+                           their runtime, since Hudi carries some of the same classes as "spark-avro" -->
+                <relocation>
+                  <pattern>org.apache.spark.sql.avro.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.spark.sql.avro.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>com.beust.jcommander.</pattern>
                   <shadedPattern>org.apache.hudi.com.beust.jcommander.</shadedPattern>
@@ -179,6 +200,25 @@
                   <pattern>org.apache.commons.pool.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.commons.pool.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.apache.hive.jdbc.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.hive.jdbc.</shadedPattern>
@@ -259,6 +299,74 @@
                   <pattern>org.apache.parquet.avro.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.parquet.avro.</shadedPattern>
                 </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -270,6 +378,8 @@
                     <!-- Use this jar's NOTICE and license file -->
                     <exclude>META-INF/NOTICE*</exclude>
                     <exclude>META-INF/LICENSE*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
@@ -398,6 +508,12 @@
       <artifactId>hive-metastore</artifactId>
       <version>${hive.version}</version>
       <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml
index f66bc7f051e48..6adeae0fc2715 100644
--- a/packaging/hudi-kafka-connect-bundle/pom.xml
+++ b/packaging/hudi-kafka-connect-bundle/pom.xml
@@ -20,7 +20,7 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
@@ -58,14 +58,16 @@
                                         implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
                                 </transformer>
                                 <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+                                    implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
                                     <addHeader>true</addHeader>
                                 </transformer>
                                 <transformer
-                                        implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
                                     <resource>META-INF/LICENSE</resource>
                                     <file>target/classes/META-INF/LICENSE</file>
                                 </transformer>
+                                <transformer
+                                    implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                             </transformers>
                             <artifactSet>
                                 <includes>
@@ -115,13 +117,21 @@
                                     <include>org.objenesis:objenesis</include>
                                     <include>com.esotericsoftware:kryo-shaded</include>
                                     <include>com.esotericsoftware:minlog</include>
-                                    
+
                                     <include>org.apache.hbase:hbase-client</include>
                                     <include>org.apache.hbase:hbase-common</include>
-                                    <include>org.apache.hbase:hbase-protocol</include>
+                                    <include>org.apache.hbase:hbase-hadoop-compat</include>
+                                    <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                                    <include>org.apache.hbase:hbase-metrics</include>
+                                    <include>org.apache.hbase:hbase-metrics-api</include>
+                                    <include>org.apache.hbase:hbase-protocol-shaded</include>
                                     <include>org.apache.hbase:hbase-server</include>
-                                    <include>org.apache.htrace:htrace-core</include>
+                                    <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                                    <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                                    <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                                    <include>org.apache.htrace:htrace-core4</include>
                                     <include>org.scala-lang:*</include>
+                                    <include>commons-io:commons-io</include>
                                 </includes>
                             </artifactSet>
                             <relocations>
@@ -131,15 +141,107 @@
                                 </relocation>
                                 <relocation>
                                     <pattern>com.yammer.metrics.</pattern>
-                                    <shadedPattern>${kafka.connect.bundle.shade.prefix}com.yammer.metrics.</shadedPattern>
+                                    <shadedPattern>${kafka.connect.bundle.shade.prefix}com.yammer.metrics.
+                                    </shadedPattern>
                                 </relocation>
                                 <relocation>
                                     <pattern>com.beust.jcommander.</pattern>
-                                    <shadedPattern>${kafka.connect.bundle.shade.prefix}com.beust.jcommander.</shadedPattern>
+                                    <shadedPattern>${kafka.connect.bundle.shade.prefix}com.beust.jcommander.
+                                    </shadedPattern>
                                 </relocation>
                                 <relocation>
                                     <pattern>org.eclipse.jetty.</pattern>
-                                    <shadedPattern>${kafka.connect.bundle.shade.prefix}org.eclipse.jetty.</shadedPattern>
+                                    <shadedPattern>${kafka.connect.bundle.shade.prefix}org.eclipse.jetty.
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.commons.io.</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.hbase.</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                                    <excludes>
+                                        <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                                    </excludes>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hbase.</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.htrace.</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                                </relocation>
+                                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                                    <shadedPattern>
+                                        org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                                    </shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                                    <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                                    </shadedPattern>
                                 </relocation>
                             </relocations>
                             <filters>
@@ -150,6 +252,8 @@
                                         <exclude>META-INF/*.DSA</exclude>
                                         <exclude>META-INF/*.RSA</exclude>
                                         <exclude>META-INF/services/javax.*</exclude>
+                                        <exclude>**/*.proto</exclude>
+                                        <exclude>hbase-webapps/**</exclude>
                                     </excludes>
                                 </filter>
                             </filters>
@@ -322,13 +426,6 @@
             <scope>${utilities.bundle.hive.scope}</scope>
         </dependency>
 
-        <dependency>
-            <groupId>org.apache.htrace</groupId>
-            <artifactId>htrace-core</artifactId>
-            <version>${htrace.version}</version>
-            <scope>compile</scope>
-        </dependency>
-
     </dependencies>
 </project>
 
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index 90c1087dcb4d2..ada62cfee47c7 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -61,6 +61,7 @@
                   <resource>META-INF/LICENSE</resource>
                   <file>target/classes/META-INF/LICENSE</file>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -75,20 +76,51 @@
                   <include>com.esotericsoftware:minlog</include>
                   <include>org.apache.hbase:hbase-common</include>
                   <include>org.apache.hbase:hbase-client</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
                   <include>org.apache.hbase:hbase-protocol</include>
-                  <include>org.apache.hbase:hbase-server</include>
-                  <include>org.apache.htrace:htrace-core</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>com.yammer.metrics:metrics-core</include>
                   <include>com.google.guava:guava</include>
+                  <include>commons-io:commons-io</include>
                   <include>commons-lang:commons-lang</include>
                   <include>com.google.protobuf:protobuf-java</include>
                 </includes>
               </artifactSet>
               <relocations>
+                <relocation>
+                  <pattern>org.apache.parquet.avro.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.parquet.avro.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.apache.avro.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.codehaus.jackson.</pattern>
                   <shadedPattern>org.apache.hudi.org.codehaus.jackson.</shadedPattern>
@@ -121,14 +153,78 @@
                   <pattern>com.google.protobuf.</pattern>
                   <shadedPattern>${presto.bundle.bootstrap.shade.prefix}com.google.protobuf.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>org.apache.htrace.</pattern>
-                  <shadedPattern>${presto.bundle.bootstrap.shade.prefix}org.apache.htrace.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>org.apache.parquet.avro.</pattern>
                   <shadedPattern>${presto.bundle.bootstrap.shade.prefix}org.apache.parquet.avro.</shadedPattern>
                 </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <createDependencyReducedPom>false</createDependencyReducedPom>
               <filters>
@@ -139,7 +235,9 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
                     <exclude>com/esotericsoftware/reflectasm/**</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                     <exclude>stringBehavior.avsc</exclude>
                   </excludes>
                 </filter>
@@ -171,20 +269,6 @@
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-hadoop-mr-bundle</artifactId>
       <version>${project.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-server</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-client</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
 
     <!-- Parquet -->
@@ -201,42 +285,6 @@
       <scope>compile</scope>
     </dependency>
 
-    <!-- HBase -->
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <scope>compile</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.codehaus.jackson</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
     <!--Guava needs to be shaded because HBase 1.2.3 depends on an earlier guava version i.e 12.0.1 and hits runtime
     issues with the guava version present in Presto runtime-->
     <dependency>
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index a877d10a586a8..698cc534d0807 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -63,6 +63,7 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                   <resource>META-INF/services/org.apache.spark.sql.sources.DataSourceRegister</resource>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -94,6 +95,7 @@
 
                   <include>com.github.davidmoten:guava-mini</include>
                   <include>com.github.davidmoten:hilbert-curve</include>
+                  <include>com.github.ben-manes.caffeine:caffeine</include>
                   <include>com.twitter:bijection-avro_${scala.binary.version}</include>
                   <include>com.twitter:bijection-core_${scala.binary.version}</include>
                   <include>io.dropwizard.metrics:metrics-core</include>
@@ -107,7 +109,6 @@
                   <include>com.yammer.metrics:metrics-core</include>
                   <include>com.google.guava:guava</include>
 
-                  <include>org.apache.spark:spark-avro_${scala.binary.version}</include>
                   <include>org.apache.hive:hive-common</include>
                   <include>org.apache.hive:hive-service</include>
                   <include>org.apache.hive:hive-service-rpc</include>
@@ -116,16 +117,31 @@
 
                   <include>org.apache.hbase:hbase-client</include>
                   <include>org.apache.hbase:hbase-common</include>
-                  <include>org.apache.hbase:hbase-protocol</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
                   <include>org.apache.hbase:hbase-server</include>
-                  <include>org.apache.htrace:htrace-core</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>org.apache.curator:curator-framework</include>
                   <include>org.apache.curator:curator-client</include>
                   <include>org.apache.curator:curator-recipes</include>
                   <include>commons-codec:commons-codec</include>
+                  <include>commons-io:commons-io</include>
                 </includes>
               </artifactSet>
               <relocations>
+                <!-- NOTE: We have to relocate all classes w/in org.apache.spark.sql.avro to avoid
+                           potential classpath collisions in case users would like to also use "spark-avro" w/in
+                           their runtime, since Hudi carries some of the same classes as "spark-avro" -->
+                <relocation>
+                  <pattern>org.apache.spark.sql.avro.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.spark.sql.avro.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>com.yammer.metrics.</pattern>
                   <shadedPattern>org.apache.hudi.com.yammer.metrics.</shadedPattern>
@@ -135,8 +151,23 @@
                   <shadedPattern>org.apache.hudi.com.beust.jcommander.</shadedPattern>
                 </relocation>
                 <relocation>
-                  <pattern>org.apache.spark.sql.avro.</pattern>
-                  <shadedPattern>${spark.bundle.spark.shade.prefix}org.apache.spark.sql.avro.</shadedPattern>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
                 </relocation>
                 <relocation>
                   <pattern>org.apache.hive.jdbc.</pattern>
@@ -180,9 +211,77 @@
                 </relocation>
                 <relocation>
                   <pattern>com.google.common.</pattern>
-                  <shadedPattern>${spark.bundle.spark.shade.prefix}com.google.common.</shadedPattern>
+                  <shadedPattern>org.apache.hudi.com.google.common.</shadedPattern>
                 </relocation>
                 <!-- TODO: Revisit GH ISSUE #533 & PR#633-->
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -192,6 +291,8 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
@@ -264,13 +365,6 @@
       <version>${project.version}</version>
     </dependency>
 
-    <!-- Spark (Packages) -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_${scala.binary.version}</artifactId>
-      <scope>${spark.bundle.avro.scope}</scope>
-    </dependency>
-
     <!-- Parquet -->
     <dependency>
       <groupId>org.apache.parquet</groupId>
@@ -314,58 +408,6 @@
       <scope>${spark.bundle.hive.scope}</scope>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.htrace</groupId>
-      <artifactId>htrace-core</artifactId>
-      <version>${htrace.version}</version>
-      <scope>compile</scope>
-    </dependency>
-
-    <!-- Hbase -->
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <scope>compile</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.codehaus.jackson</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-client</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-protocol</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-
     <!-- zookeeper -->
     <dependency>
       <groupId>org.apache.curator</groupId>
@@ -395,12 +437,5 @@
         <spark.bundle.hive.shade.prefix>org.apache.hudi.</spark.bundle.hive.shade.prefix>
       </properties>
     </profile>
-    <profile>
-      <id>spark-shade-unbundle-avro</id>
-      <properties>
-        <spark.bundle.avro.scope>provided</spark.bundle.avro.scope>
-        <spark.bundle.spark.shade.prefix />
-      </properties>
-    </profile>
   </profiles>
 </project>
diff --git a/packaging/hudi-timeline-server-bundle/pom.xml b/packaging/hudi-timeline-server-bundle/pom.xml
index 18f7c9665a20f..5fcf7a130c888 100644
--- a/packaging/hudi-timeline-server-bundle/pom.xml
+++ b/packaging/hudi-timeline-server-bundle/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.11.0-SNAPSHOT</version>
+        <version>0.12.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
@@ -155,6 +155,8 @@
                             <exclude>META-INF/*.DSA</exclude>
                             <exclude>META-INF/*.RSA</exclude>
                             <exclude>META-INF/services/javax.*</exclude>
+                            <exclude>**/*.proto</exclude>
+                            <exclude>hbase-webapps/**</exclude>
                         </excludes>
                     </filter>
                 </filters>
@@ -198,17 +200,114 @@
                       <include>com.fasterxml.jackson.core:jackson-annotations</include>
                       <include>com.fasterxml.jackson.core:jackson-core</include>
                       <include>com.fasterxml.jackson.core:jackson-databind</include>
-                      <include>org.apache.htrace:htrace-core</include>
                       <include>org.apache.hbase:hbase-common</include>
                       <include>org.apache.hbase:hbase-client</include>
-                      <include>org.apache.hbase:hbase-protocol</include>
+                      <include>org.apache.hbase:hbase-hadoop-compat</include>
+                      <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                      <include>org.apache.hbase:hbase-metrics</include>
+                      <include>org.apache.hbase:hbase-metrics-api</include>
+                      <include>org.apache.hbase:hbase-protocol-shaded</include>
                       <include>org.apache.hbase:hbase-server</include>
+                      <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                      <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                      <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                      <include>org.apache.htrace:htrace-core4</include>
                       <include>com.esotericsoftware:kryo-shaded</include>
                       <include>com.esotericsoftware:minlog</include>
+                      <include>commons-io:commons-io</include>
                       <include>log4j:log4j</include>
                       <include>org.objenesis:objenesis</include>
                   </includes>
                 </artifactSet>
+                <relocations>
+                    <relocation>
+                        <pattern>org.apache.commons.io.</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.hbase.</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                        <excludes>
+                            <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                        </excludes>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hbase.</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.htrace.</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                    </relocation>
+                    <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                        </shadedPattern>
+                    </relocation>
+                    <relocation>
+                        <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                        <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                        </shadedPattern>
+                    </relocation>
+                </relocations>
             </configuration>
             <executions>
                 <execution>
diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml
index adf73f1bb0b83..5052038507a8a 100644
--- a/packaging/hudi-trino-bundle/pom.xml
+++ b/packaging/hudi-trino-bundle/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -62,6 +62,7 @@
                   <resource>META-INF/LICENSE</resource>
                   <file>target/classes/META-INF/LICENSE</file>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -76,22 +77,52 @@
                   <include>com.esotericsoftware:minlog</include>
                   <include>org.apache.hbase:hbase-common</include>
                   <include>org.apache.hbase:hbase-client</include>
-                  <include>org.apache.hbase:hbase-protocol</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
                   <include>org.apache.hbase:hbase-server</include>
                   <include>org.apache.hbase:hbase-annotations</include>
-                  <include>org.apache.htrace:htrace-core</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>com.yammer.metrics:metrics-core</include>
                   <include>com.google.guava:guava</include>
                   <include>commons-lang:commons-lang</include>
+                  <include>commons-io:commons-io</include>
                   <include>com.google.protobuf:protobuf-java</include>
                 </includes>
               </artifactSet>
               <relocations>
-
+                <relocation>
+                  <pattern>org.apache.parquet.avro.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.parquet.avro.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.apache.avro.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.codehaus.jackson.</pattern>
                   <shadedPattern>org.apache.hudi.org.codehaus.jackson.</shadedPattern>
@@ -124,6 +155,74 @@
                   <pattern>com.google.protobuf.</pattern>
                   <shadedPattern>${trino.bundle.bootstrap.shade.prefix}com.google.protobuf.</shadedPattern>
                 </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <createDependencyReducedPom>false</createDependencyReducedPom>
               <filters>
@@ -134,6 +233,8 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
@@ -155,71 +256,10 @@
 
   <dependencies>
     <!-- Hoodie -->
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-common</artifactId>
-      <version>${project.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-server</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-client</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-hadoop-mr-bundle</artifactId>
       <version>${project.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-server</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-client</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <!-- HBase -->
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <scope>compile</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.codehaus.jackson</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
 
     <!-- Parquet -->
diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml
index 0685baee0a9ff..a18808678b636 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.11.0-SNAPSHOT</version>
+    <version>0.12.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -86,6 +86,7 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                   <resource>META-INF/services/org.apache.spark.sql.sources.DataSourceRegister</resource>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
               </transformers>
               <artifactSet>
                 <includes>
@@ -119,6 +120,7 @@
 
                   <include>com.github.davidmoten:guava-mini</include>
                   <include>com.github.davidmoten:hilbert-curve</include>
+                  <include>com.github.ben-manes.caffeine:caffeine</include>
                   <include>com.twitter:bijection-avro_${scala.binary.version}</include>
                   <include>com.twitter:bijection-core_${scala.binary.version}</include>
                   <include>io.confluent:kafka-avro-serializer</include>
@@ -149,16 +151,31 @@
 
                   <include>org.apache.hbase:hbase-client</include>
                   <include>org.apache.hbase:hbase-common</include>
-                  <include>org.apache.hbase:hbase-protocol</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
                   <include>org.apache.hbase:hbase-server</include>
-                  <include>org.apache.htrace:htrace-core</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
                   <include>org.apache.curator:curator-framework</include>
                   <include>org.apache.curator:curator-client</include>
                   <include>org.apache.curator:curator-recipes</include>
                   <include>commons-codec:commons-codec</include>
+                  <include>commons-io:commons-io</include>
                 </includes>
               </artifactSet>
               <relocations>
+                <!-- NOTE: We have to relocate all classes w/in org.apache.spark.sql.avro to avoid
+                           potential classpath collisions in case users would like to also use "spark-avro" w/in
+                           their runtime, since Hudi carries some of the same classes as "spark-avro" -->
+                <relocation>
+                  <pattern>org.apache.spark.sql.avro.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.spark.sql.avro.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>com.yammer.metrics.</pattern>
                   <shadedPattern>org.apache.hudi.com.yammer.metrics.</shadedPattern>
@@ -171,6 +188,25 @@
                   <pattern>org.apache.hive.jdbc.</pattern>
                   <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.jdbc.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
                 <relocation>
                   <pattern>org.apache.hadoop.hive.metastore.</pattern>
                   <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.metastore.</shadedPattern>
@@ -207,6 +243,74 @@
                   <pattern>org.eclipse.jetty.</pattern>
                   <shadedPattern>org.apache.hudi.org.eclipse.jetty.</shadedPattern>
                 </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
               </relocations>
               <filters>
                 <filter>
@@ -216,6 +320,8 @@
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                     <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
                   </excludes>
                 </filter>
               </filters>
@@ -339,51 +445,6 @@
       <scope>compile</scope>
     </dependency>
 
-    <!-- Hbase -->
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <scope>compile</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.codehaus.jackson</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-client</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-protocol</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-
     <!-- zookeeper -->
     <dependency>
       <groupId>org.apache.curator</groupId>
diff --git a/packaging/hudi-utilities-slim-bundle/README.md b/packaging/hudi-utilities-slim-bundle/README.md
new file mode 100644
index 0000000000000..58353c403d325
--- /dev/null
+++ b/packaging/hudi-utilities-slim-bundle/README.md
@@ -0,0 +1,22 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+# Usage of hudi-utilities-slim-bundle
+
+Starting from versions 0.11, Hudi provides hudi-utilities-slim-bundle which excludes hudi-spark-datasource modules.
+This new bundle is intended to be used with Hudi Spark bundle together, if using hudi-utilities-bundle solely
+introduces problems for a specific Spark version.
\ No newline at end of file
diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml
new file mode 100644
index 0000000000000..0803b1f143662
--- /dev/null
+++ b/packaging/hudi-utilities-slim-bundle/pom.xml
@@ -0,0 +1,477 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hudi</artifactId>
+    <groupId>org.apache.hudi</groupId>
+    <version>0.12.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>hudi-utilities-slim-bundle_${scala.binary.version}</artifactId>
+  <packaging>jar</packaging>
+
+  <properties>
+    <checkstyle.skip>true</checkstyle.skip>
+    <main.basedir>${project.parent.basedir}</main.basedir>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludeSubProjects>false</excludeSubProjects>
+          <numUnapprovedLicenses>0</numUnapprovedLicenses>
+          <excludes>
+            <exclude>NOTICE</exclude>
+            <exclude>DISCLAIMER</exclude>
+            <exclude>**/.*</exclude>
+            <exclude>**/*.json</exclude>
+            <exclude>**/*.log</exclude>
+            <exclude>**/*.sqltemplate</exclude>
+            <exclude>**/compose_env</exclude>
+            <exclude>**/*NOTICE*</exclude>
+            <exclude>**/*LICENSE*</exclude>
+            <exclude>**/dependency-reduced-pom.xml</exclude>
+            <exclude>**/test/resources/*.data</exclude>
+            <exclude>**/test/resources/*.commit</exclude>
+            <exclude>**/target/**</exclude>
+            <exclude>**/generated-sources/**</exclude>
+            <exclude>.github/**</exclude>
+            <exclude>**/*.sql</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven-shade-plugin.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createSourcesJar>${shadeSources}</createSourcesJar>
+              <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
+              </dependencyReducedPomLocation>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+                    <addHeader>true</addHeader>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+                  <resource>META-INF/services/org.apache.spark.sql.sources.DataSourceRegister</resource>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+              </transformers>
+              <artifactSet>
+                <includes>
+                  <include>org.apache.hudi:hudi-common</include>
+                  <include>org.apache.hudi:hudi-client-common</include>
+                  <include>org.apache.hudi:hudi-spark-client</include>
+                  <include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
+                  <include>org.apache.hudi:hudi-hive-sync</include>
+                  <include>org.apache.hudi:hudi-sync-common</include>
+                  <include>org.apache.hudi:hudi-hadoop-mr</include>
+                  <include>org.apache.hudi:hudi-timeline-service</include>
+                  <include>org.apache.hudi:hudi-aws</include>
+
+                  <include>com.yammer.metrics:metrics-core</include>
+                  <include>com.beust:jcommander</include>
+                  <include>io.javalin:javalin</include>
+                  <!-- Spark only has mortbay jetty -->
+                  <include>org.eclipse.jetty:*</include>
+                  <include>org.eclipse.jetty.websocket:*</include>
+                  <include>org.jetbrains.kotlin:*</include>
+                  <include>org.rocksdb:rocksdbjni</include>
+                  <include>org.apache.httpcomponents:httpclient</include>
+                  <include>org.apache.httpcomponents:httpcore</include>
+                  <include>org.apache.httpcomponents:fluent-hc</include>
+                  <include>org.antlr:stringtemplate</include>
+                  <include>org.apache.parquet:parquet-avro</include>
+
+                  <include>com.github.davidmoten:guava-mini</include>
+                  <include>com.github.davidmoten:hilbert-curve</include>
+                  <include>com.twitter:bijection-avro_${scala.binary.version}</include>
+                  <include>com.twitter:bijection-core_${scala.binary.version}</include>
+                  <include>io.confluent:kafka-avro-serializer</include>
+                  <include>io.confluent:kafka-schema-serializer</include>
+                  <include>io.confluent:common-config</include>
+                  <include>io.confluent:common-utils</include>
+                  <include>io.confluent:kafka-schema-registry-client</include>
+                  <include>io.dropwizard.metrics:metrics-core</include>
+                  <include>io.dropwizard.metrics:metrics-graphite</include>
+                  <include>io.dropwizard.metrics:metrics-jmx</include>
+                  <include>io.prometheus:simpleclient</include>
+                  <include>io.prometheus:simpleclient_httpserver</include>
+                  <include>io.prometheus:simpleclient_dropwizard</include>
+                  <include>io.prometheus:simpleclient_pushgateway</include>
+                  <include>io.prometheus:simpleclient_common</include>
+                  <include>com.yammer.metrics:metrics-core</include>
+                  <include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
+                  <include>org.apache.spark:spark-token-provider-kafka-0-10_${scala.binary.version}</include>
+                  <include>org.apache.kafka:kafka_${scala.binary.version}</include>
+                  <include>com.101tec:zkclient</include>
+                  <include>org.apache.kafka:kafka-clients</include>
+
+                  <include>org.apache.hive:hive-common</include>
+                  <include>org.apache.hive:hive-service</include>
+                  <include>org.apache.hive:hive-service-rpc</include>
+                  <include>org.apache.hive:hive-metastore</include>
+                  <include>org.apache.hive:hive-jdbc</include>
+
+                  <include>org.apache.hbase:hbase-client</include>
+                  <include>org.apache.hbase:hbase-common</include>
+                  <include>org.apache.hbase:hbase-hadoop-compat</include>
+                  <include>org.apache.hbase:hbase-hadoop2-compat</include>
+                  <include>org.apache.hbase:hbase-metrics</include>
+                  <include>org.apache.hbase:hbase-metrics-api</include>
+                  <include>org.apache.hbase:hbase-protocol-shaded</include>
+                  <include>org.apache.hbase:hbase-server</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-miscellaneous</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-netty</include>
+                  <include>org.apache.hbase.thirdparty:hbase-shaded-protobuf</include>
+                  <include>org.apache.htrace:htrace-core4</include>
+                  <include>org.apache.curator:curator-framework</include>
+                  <include>org.apache.curator:curator-client</include>
+                  <include>org.apache.curator:curator-recipes</include>
+                  <include>commons-codec:commons-codec</include>
+                  <include>commons-io:commons-io</include>
+                </includes>
+              </artifactSet>
+              <relocations>
+                <!-- NOTE: We have to relocate all classes w/in org.apache.spark.sql.avro to avoid
+                           potential classpath collisions in case users would like to also use "spark-avro" w/in
+                           their runtime, since Hudi carries some of the same classes as "spark-avro" -->
+                <relocation>
+                  <pattern>org.apache.spark.sql.avro.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.spark.sql.avro.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.yammer.metrics.</pattern>
+                  <shadedPattern>org.apache.hudi.com.yammer.metrics.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.beust.jcommander.</pattern>
+                  <shadedPattern>org.apache.hudi.com.beust.jcommander.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hive.jdbc.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.jdbc.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.io.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <exclude>org.apache.hadoop.hbase.KeyValue$KeyComparator</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hbase.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hbase.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.htrace.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hive.metastore.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.metastore.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hive.common.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.common.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hive.common.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.common.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hive.conf.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.conf.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hive.service.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.service.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.hive.service.</pattern>
+                  <shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.service.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.codahale.metrics.</pattern>
+                  <shadedPattern>org.apache.hudi.com.codahale.metrics.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.codec.</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.commons.codec.</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.eclipse.jetty.</pattern>
+                  <shadedPattern>org.apache.hudi.org.eclipse.jetty.</shadedPattern>
+                </relocation>
+                <!-- The classes below in org.apache.hadoop.metrics2 package come from
+                hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one,
+                instead of shading all classes under org.apache.hadoop.metrics2 including ones
+                from hadoop. -->
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.MetricsExecutor</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.impl.JmxCacheBuster</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MetricsExecutorImpl</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableFastCounter</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableRangeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableSizeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.lib.MutableTimeHistogram</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricQuantile</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+                  </shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.hadoop.metrics2.util.MetricSampleQuantiles</pattern>
+                  <shadedPattern>org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+                  </shadedPattern>
+                </relocation>
+              </relocations>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                    <exclude>META-INF/services/javax.*</exclude>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>hbase-webapps/**</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+      <resource>
+        <directory>src/test/resources</directory>
+      </resource>
+    </resources>
+  </build>
+
+  <dependencies>
+    <!-- Hoodie -->
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-client-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-spark-client</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-hive-sync</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>${hudi.spark.module}_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>${hudi.spark.common.module}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Parquet -->
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- Hive -->
+    <dependency>
+      <groupId>${hive.groupid}</groupId>
+      <artifactId>hive-service</artifactId>
+      <version>${hive.version}</version>
+      <scope>${utilities.bundle.hive.scope}</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>${hive.groupid}</groupId>
+      <artifactId>hive-service-rpc</artifactId>
+      <version>${hive.version}</version>
+      <scope>${utilities.bundle.hive.scope}</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>${hive.groupid}</groupId>
+      <artifactId>hive-jdbc</artifactId>
+      <version>${hive.version}</version>
+      <scope>${utilities.bundle.hive.scope}</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>${hive.groupid}</groupId>
+      <artifactId>hive-metastore</artifactId>
+      <version>${hive.version}</version>
+      <scope>${utilities.bundle.hive.scope}</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>${hive.groupid}</groupId>
+      <artifactId>hive-common</artifactId>
+      <version>${hive.version}</version>
+      <scope>${utilities.bundle.hive.scope}</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.htrace</groupId>
+      <artifactId>htrace-core</artifactId>
+      <version>${htrace.version}</version>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- zookeeper -->
+    <dependency>
+      <groupId>org.apache.curator</groupId>
+      <artifactId>curator-framework</artifactId>
+      <version>${zk-curator.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.curator</groupId>
+      <artifactId>curator-client</artifactId>
+      <version>${zk-curator.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.curator</groupId>
+      <artifactId>curator-recipes</artifactId>
+      <version>${zk-curator.version}</version>
+    </dependency>
+  </dependencies>
+
+  <profiles>
+    <profile>
+      <id>utilities-bundle-shade-hive</id>
+      <properties>
+        <utilities.bundle.hive.scope>compile</utilities.bundle.hive.scope>
+        <utilities.bundle.hive.shade.prefix>org.apache.hudi.</utilities.bundle.hive.shade.prefix>
+      </properties>
+    </profile>
+  </profiles>
+</project>
+
diff --git a/packaging/hudi-utilities-slim-bundle/src/main/java/org/apache/hudi/utilities/bundle/Main.java b/packaging/hudi-utilities-slim-bundle/src/main/java/org/apache/hudi/utilities/bundle/Main.java
new file mode 100644
index 0000000000000..a4e2dcb34f647
--- /dev/null
+++ b/packaging/hudi-utilities-slim-bundle/src/main/java/org/apache/hudi/utilities/bundle/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.bundle;
+
+import org.apache.hudi.common.util.ReflectionUtils;
+
+/**
+ * A simple main class to dump all classes loaded in current classpath
+ *
+ * This is a workaround for generating sources and javadoc jars for packaging modules. The maven plugins for generating
+ * javadoc and sources plugins do not generate corresponding jars if there are no source files.
+ *
+ * This class does not have anything to do with Hudi but is there to keep mvn javadocs/source plugin happy.
+ */
+public class Main {
+
+  public static void main(String[] args) {
+    ReflectionUtils.getTopLevelClassesInClasspath(Main.class).forEach(System.out::println);
+  }
+}
diff --git a/pom.xml b/pom.xml
index c61d5ef8f3a75..570259b65d2a3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,7 +29,7 @@
   <groupId>org.apache.hudi</groupId>
   <artifactId>hudi</artifactId>
   <packaging>pom</packaging>
-  <version>0.11.0-SNAPSHOT</version>
+  <version>0.12.0-SNAPSHOT</version>
   <description>Apache Hudi brings stream style processing on big data</description>
   <url>https://github.com/apache/hudi</url>
   <name>Hudi</name>
@@ -39,16 +39,20 @@
     <module>hudi-cli</module>
     <module>hudi-client</module>
     <module>hudi-aws</module>
+    <module>hudi-gcp</module>
     <module>hudi-hadoop-mr</module>
     <module>hudi-spark-datasource</module>
     <module>hudi-timeline-service</module>
     <module>hudi-utilities</module>
     <module>hudi-sync</module>
     <module>packaging/hudi-hadoop-mr-bundle</module>
+    <module>packaging/hudi-datahub-sync-bundle</module>
     <module>packaging/hudi-hive-sync-bundle</module>
+    <module>packaging/hudi-gcp-bundle</module>
     <module>packaging/hudi-spark-bundle</module>
     <module>packaging/hudi-presto-bundle</module>
     <module>packaging/hudi-utilities-bundle</module>
+    <module>packaging/hudi-utilities-slim-bundle</module>
     <module>packaging/hudi-timeline-server-bundle</module>
     <module>packaging/hudi-trino-bundle</module>
     <module>hudi-examples</module>
@@ -91,6 +95,7 @@
     <fasterxml.jackson.dataformat.yaml.version>2.7.4</fasterxml.jackson.dataformat.yaml.version>
     <fasterxml.spark3.version>2.10.0</fasterxml.spark3.version>
     <kafka.version>2.0.0</kafka.version>
+    <kafka.spark3.version>2.4.1</kafka.spark3.version>
     <pulsar.version>2.8.1</pulsar.version>
     <confluent.version>5.3.4</confluent.version>
     <glassfish.version>2.17</glassfish.version>
@@ -103,7 +108,7 @@
     <log4j.test.version>2.17.0</log4j.test.version>
     <slf4j.version>1.7.30</slf4j.version>
     <joda.version>2.9.9</joda.version>
-    <hadoop.version>2.7.3</hadoop.version>
+    <hadoop.version>2.10.1</hadoop.version>
     <hive.groupid>org.apache.hive</hive.groupid>
     <hive.version>2.3.1</hive.version>
     <hive.exec.classifier>core</hive.exec.classifier>
@@ -124,21 +129,27 @@
     <flink.runtime.artifactId>flink-runtime</flink.runtime.artifactId>
     <flink.table.runtime.artifactId>flink-table-runtime_${scala.binary.version}</flink.table.runtime.artifactId>
     <flink.table.planner.artifactId>flink-table-planner_${scala.binary.version}</flink.table.planner.artifactId>
+    <flink.format.parquet.version>1.12.2</flink.format.parquet.version>
+    <spark31.version>3.1.3</spark31.version>
+    <spark32.version>3.2.1</spark32.version>
     <hudi.spark.module>hudi-spark2</hudi.spark.module>
     <hudi.spark.common.module>hudi-spark2-common</hudi.spark.common.module>
     <avro.version>1.8.2</avro.version>
+    <caffeine.version>2.9.1</caffeine.version>
     <scala11.version>2.11.12</scala11.version>
     <scala12.version>2.12.10</scala12.version>
     <scala.version>${scala11.version}</scala.version>
     <scala.binary.version>2.11</scala.binary.version>
-    <apache-rat-plugin.version>0.12</apache-rat-plugin.version>
+    <apache-rat-plugin.version>0.13</apache-rat-plugin.version>
     <scala-maven-plugin.version>3.3.1</scala-maven-plugin.version>
     <scalatest.version>3.0.1</scalatest.version>
+    <scalatest.spark3.version>3.1.0</scalatest.spark3.version>
     <surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
     <thrift.version>0.12.0</thrift.version>
     <jetty.version>9.4.15.v20190215</jetty.version>
     <htrace.version>3.1.0-incubating</htrace.version>
-    <hbase.version>1.2.3</hbase.version>
+    <hbase.version>2.4.9</hbase.version>
+    <hbase-thirdparty.version>3.5.1</hbase-thirdparty.version>
     <codehaus-jackson.version>1.9.13</codehaus-jackson.version>
     <h2.version>1.4.199</h2.version>
     <awaitility.version>3.1.2</awaitility.version>
@@ -153,8 +164,6 @@
     <main.basedir>${project.basedir}</main.basedir>
     <spark.bundle.hive.scope>provided</spark.bundle.hive.scope>
     <spark.bundle.hive.shade.prefix/>
-    <spark.bundle.avro.scope>compile</spark.bundle.avro.scope>
-    <spark.bundle.spark.shade.prefix>org.apache.hudi.spark.</spark.bundle.spark.shade.prefix>
     <utilities.bundle.hive.scope>provided</utilities.bundle.hive.scope>
     <utilities.bundle.hive.shade.prefix/>
     <argLine>-Xmx2g</argLine>
@@ -170,6 +179,7 @@
     <proto.version>3.17.3</proto.version>
     <protoc.version>3.11.4</protoc.version>
     <dynamodb.lockclient.version>1.1.0</dynamodb.lockclient.version>
+    <zookeeper.version>3.5.7</zookeeper.version>
     <dynamodb-local.port>8000</dynamodb-local.port>
     <dynamodb-local.endpoint>http://localhost:${dynamodb-local.port}</dynamodb-local.endpoint>
   </properties>
@@ -302,7 +312,7 @@
         <configuration>
           <autoVersionSubmodules>true</autoVersionSubmodules>
           <useReleaseProfile>false</useReleaseProfile>
-          <releaseProfiles>release</releaseProfiles>
+          <releaseProfiles>release,integration-tests</releaseProfiles>
           <goals>deploy</goals>
         </configuration>
       </plugin>
@@ -422,6 +432,7 @@
               <exclude>DISCLAIMER</exclude>
               <exclude>**/.*</exclude>
               <exclude>**/*.json</exclude>
+              <exclude>**/*.hfile</exclude>
               <exclude>**/*.log</exclude>
               <exclude>**/*.sqltemplate</exclude>
               <exclude>**/compose_env</exclude>
@@ -434,6 +445,9 @@
               <exclude>**/generated-sources/**</exclude>
               <exclude>.github/**</exclude>
               <exclude>**/*.sql</exclude>
+              <!-- local files not in version control -->
+              <exclude>**/*.iml</exclude>
+              <exclude>.mvn/**</exclude>
             </excludes>
           </configuration>
           <executions>
@@ -552,6 +566,13 @@
         <scope>provided</scope>
       </dependency>
 
+      <!-- caffeine -->
+      <dependency>
+        <groupId>com.github.ben-manes.caffeine</groupId>
+        <artifactId>caffeine</artifactId>
+        <version>${caffeine.version}</version>
+      </dependency>
+
       <!-- Parquet -->
       <dependency>
         <groupId>org.apache.parquet</groupId>
@@ -601,14 +622,6 @@
         <scope>test</scope>
       </dependency>
 
-      <!-- Spark (Packages) -->
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-avro_${scala.binary.version}</artifactId>
-        <version>${spark.version}</version>
-        <scope>provided</scope>
-      </dependency>
-
       <!-- Flink -->
       <dependency>
         <groupId>org.apache.flink</groupId>
@@ -1191,9 +1204,6 @@
           <value>true</value>
         </property>
       </activation>
-      <properties>
-        <integration-tests>true</integration-tests>
-      </properties>
       <build>
         <plugins>
           <plugin>
@@ -1366,7 +1376,8 @@
       <id>integration-tests</id>
       <activation>
         <property>
-          <name>integration-tests</name>
+          <name>deployArtifacts</name>
+          <value>true</value>
         </property>
       </activation>
       <modules>
@@ -1377,7 +1388,7 @@
       <properties>
         <skipUTs>true</skipUTs>
         <skipFTs>true</skipFTs>
-        <skipITs>false</skipITs>
+        <skipITs>${skipTests}</skipITs>
       </properties>
       <build>
         <plugins>
@@ -1529,7 +1540,7 @@
                 <link>https://docs.spring.io/spring-shell/docs/1.2.0.RELEASE</link>
                 <link>https://fasterxml.github.io/jackson-databind/javadoc/2.6</link>
                 <link>https://hadoop.apache.org/docs/r${hadoop.version}/api</link>
-                <link>https://hbase.apache.org/1.2/apidocs</link>
+                <link>https://hbase.apache.org/2.4/apidocs</link>
                 <link>https://hive.apache.org/javadocs/r2.3.6/api</link>
                 <link>https://javadoc.io/static/io.javalin/javalin/2.3.0</link>
                 <link>https://javadoc.io/doc/org.apache.parquet/parquet-avro/${parquet.version}</link>
@@ -1595,6 +1606,7 @@
       </build>
     </profile>
 
+    <!-- Default is spark2 profile, and spark2 is an alias of spark2.4 -->
     <profile>
       <id>spark2</id>
       <modules>
@@ -1610,25 +1622,44 @@
         </property>
       </activation>
     </profile>
+
+    <profile>
+      <id>spark2.4</id>
+      <modules>
+        <module>hudi-spark-datasource/hudi-spark2</module>
+        <module>hudi-spark-datasource/hudi-spark2-common</module>
+      </modules>
+      <properties>
+        <sparkbundle.version>2.4</sparkbundle.version>
+      </properties>
+      <activation>
+        <property>
+          <name>spark2.4</name>
+        </property>
+      </activation>
+    </profile>
+
+    <!-- spark3 is an alias of spark3.2 -->
     <profile>
       <id>spark3</id>
       <properties>
         <spark3.version>3.2.1</spark3.version>
         <spark.version>${spark3.version}</spark.version>
-        <sparkbundle.version>${spark3.version}</sparkbundle.version>
+        <sparkbundle.version>3</sparkbundle.version>
         <scala.version>${scala12.version}</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
         <hudi.spark.module>hudi-spark3</hudi.spark.module>
         <hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
-        <scalatest.version>3.1.0</scalatest.version>
-        <kafka.version>2.4.1</kafka.version>
+        <scalatest.version>${scalatest.spark3.version}</scalatest.version>
+        <kafka.version>${kafka.spark3.version}</kafka.version>
         <parquet.version>1.12.2</parquet.version>
         <avro.version>1.10.2</avro.version>
         <orc.version>1.6.12</orc.version>
         <fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
         <fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
         <fasterxml.jackson.module.scala.version>${fasterxml.spark3.version}</fasterxml.jackson.module.scala.version>
-        <fasterxml.jackson.dataformat.yaml.version>${fasterxml.spark3.version}</fasterxml.jackson.dataformat.yaml.version>
+        <fasterxml.jackson.dataformat.yaml.version>${fasterxml.spark3.version}
+        </fasterxml.jackson.dataformat.yaml.version>
         <skip.hudi-spark2.unit.tests>true</skip.hudi-spark2.unit.tests>
         <skipITs>true</skipITs>
       </properties>
@@ -1644,61 +1675,66 @@
     </profile>
 
     <profile>
-      <id>spark3.2.0</id>
+      <id>spark3.1</id>
       <properties>
-        <spark3.version>3.2.0</spark3.version>
+        <spark3.version>3.1.3</spark3.version>
         <spark.version>${spark3.version}</spark.version>
-        <sparkbundle.version>${spark3.version}</sparkbundle.version>
+        <sparkbundle.version>3.1</sparkbundle.version>
         <scala.version>${scala12.version}</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
-        <hudi.spark.module>hudi-spark3</hudi.spark.module>
+        <hudi.spark.module>hudi-spark3.1.x</hudi.spark.module>
         <hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
-        <scalatest.version>3.1.0</scalatest.version>
-        <kafka.version>2.4.1</kafka.version>
+        <scalatest.version>${scalatest.spark3.version}</scalatest.version>
+        <kafka.version>${kafka.spark3.version}</kafka.version>
         <fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
         <fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
         <fasterxml.jackson.module.scala.version>${fasterxml.spark3.version}</fasterxml.jackson.module.scala.version>
-        <fasterxml.jackson.dataformat.yaml.version>${fasterxml.spark3.version}</fasterxml.jackson.dataformat.yaml.version>
+        <fasterxml.jackson.dataformat.yaml.version>${fasterxml.spark3.version}
+        </fasterxml.jackson.dataformat.yaml.version>
         <skip.hudi-spark2.unit.tests>true</skip.hudi-spark2.unit.tests>
         <skipITs>true</skipITs>
       </properties>
       <modules>
-        <module>hudi-spark-datasource/hudi-spark3</module>
+        <module>hudi-spark-datasource/hudi-spark3.1.x</module>
         <module>hudi-spark-datasource/hudi-spark3-common</module>
       </modules>
       <activation>
         <property>
-          <name>spark3.2.0</name>
+          <name>spark3.1</name>
         </property>
       </activation>
     </profile>
 
     <profile>
-      <id>spark3.1.x</id>
+      <id>spark3.2</id>
       <properties>
-        <spark3.version>3.1.2</spark3.version>
+        <spark3.version>3.2.1</spark3.version>
         <spark.version>${spark3.version}</spark.version>
-        <sparkbundle.version>${spark3.version}</sparkbundle.version>
+        <sparkbundle.version>3.2</sparkbundle.version>
         <scala.version>${scala12.version}</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
-        <hudi.spark.module>hudi-spark3.1.x</hudi.spark.module>
+        <hudi.spark.module>hudi-spark3</hudi.spark.module>
         <hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
-        <scalatest.version>3.1.0</scalatest.version>
-        <kafka.version>2.4.1</kafka.version>
+        <scalatest.version>${scalatest.spark3.version}</scalatest.version>
+        <kafka.version>${kafka.spark3.version}</kafka.version>
+        <parquet.version>1.12.2</parquet.version>
+        <avro.version>1.10.2</avro.version>
+        <orc.version>1.6.12</orc.version>
         <fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
         <fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
         <fasterxml.jackson.module.scala.version>${fasterxml.spark3.version}</fasterxml.jackson.module.scala.version>
-        <fasterxml.jackson.dataformat.yaml.version>${fasterxml.spark3.version}</fasterxml.jackson.dataformat.yaml.version>
+        <fasterxml.jackson.dataformat.yaml.version>${fasterxml.spark3.version}
+        </fasterxml.jackson.dataformat.yaml.version>
         <skip.hudi-spark2.unit.tests>true</skip.hudi-spark2.unit.tests>
         <skipITs>true</skipITs>
       </properties>
       <modules>
-        <module>hudi-spark-datasource/hudi-spark3.1.x</module>
+        <module>hudi-spark-datasource/hudi-spark3</module>
         <module>hudi-spark-datasource/hudi-spark3-common</module>
       </modules>
       <activation>
         <property>
-          <name>spark3.1.x</name>
+          <name>spark3.2</name>
         </property>
       </activation>
     </profile>
diff --git a/rfc/README.md b/rfc/README.md
index 0b3142e8bd6e5..0009a1b72b953 100644
--- a/rfc/README.md
+++ b/rfc/README.md
@@ -14,60 +14,74 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
+
 # RFCs
- - The RFC process is documented on our [site](https://hudi.apache.org/contribute/rfc-process). Please familiarize yourself with it, before working a new RFC.
- - Status can be one of these values: `UNDER REVIEW` (or) `IN PROGRESS` (or) `ABANDONED` (or) `COMPLETED`.
+
+The RFC process is documented on our [site](https://hudi.apache.org/contribute/rfc-process). Please familiarize yourself
+with it, before working a new RFC.
+
+Status can be one of these values.
+
+| Status | Meaning |
+| -------|-------------------------------------------------------|
+| `UNDER REVIEW` |  RFC has been proposed and community is actively debating the design/proposal.        |
+| `IN PROGRESS` |  The initial phase of implementation is underway.        |
+| `ONGOING` |  Some or most work has landed; community continues to improve or build follow on phases.         |
+| `ABANDONED` | The proposal was not implemented, due to various reasons.         |
+| `COMPLETED` |  All work is deemed complete.        |
 
 The list of all RFCs can be found here.
 
 > Older RFC content is still [here](https://cwiki.apache.org/confluence/display/HUDI/RFC+Process).
 
-| RFC Number | Title | Status |
-| ---| ---| --- |
-| 1 | [CSV Source Support for Delta Streamer](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+01+%3A+CSV+Source+Support+for+Delta+Streamer) | `COMPLETED` |
-| 2 | [ORC Storage in Hudi](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708439) | `IN PROGRESS` |
-| 3 | [Timeline Service with Incremental File System View Syncing](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708965) | `COMPLETED` |
-| 4 | [Faster Hive incremental pull queries](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115513622) | `COMPLETED` |
-| 5 | [HUI (Hudi WebUI)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=130027233) | `ABANDONED` |
-| 6 | [Add indexing support to the log file](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+06+%3A+Add+indexing+support+to+the+log+file) | `ABANDONED` |
-| 7 | [Point in time Time-Travel queries on Hudi table](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+07+%3A+Point+in+time+Time-Travel+queries+on+Hudi+table) | `COMPLETED` |
-| 8 | [Record level indexing mechanisms for Hudi datasets](https://cwiki.apache.org/confluence/display/HUDI/RFC-08++Record+level+indexing+mechanisms+for+Hudi+datasets) | `UNDER REVIEW` |
-| 9 | [Hudi Dataset Snapshot Exporter](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+09+%3A+Hudi+Dataset+Snapshot+Exporter) | `COMPLETED` |
-| 10 | [Restructuring and auto-generation of docs](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+10+%3A+Restructuring+and+auto-generation+of+docs) | `COMPLETED` |
-| 11 | [Refactor of the configuration framework of hudi project](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+11+%3A+Refactor+of+the+configuration+framework+of+hudi+project) | `ABANDONED` |
-| 12 | [Efficient Migration of Large Parquet Tables to Apache Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+12+%3A+Efficient+Migration+of+Large+Parquet+Tables+to+Apache+Hudi) | `COMPLETED` |
-| 13 | [Integrate Hudi with Flink](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=141724520) | `COMPLETED` |
-| 14 | [JDBC incremental puller](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+14+%3A+JDBC+incremental+puller) | `COMPLETED` |
-| 15 | [HUDI File Listing Improvements](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements) | `COMPLETED` |
-| 16 | [Abstraction for HoodieInputFormat and RecordReader](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+16+Abstraction+for+HoodieInputFormat+and+RecordReader) | `COMPLETED` |
-| 17 | [Abstract common meta sync module support multiple meta service](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+17+Abstract+common+meta+sync+module+support+multiple+meta+service) | `COMPLETED` |
-| 18 | [Insert Overwrite API](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+18+Insert+Overwrite+API) | `COMPLETED` |
-| 19 | [Clustering data for freshness and query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+19+Clustering+data+for+freshness+and+query+performance) | `COMPLETED` |
-| 20 | [handle failed records](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+20+%3A+handle+failed+records) | `IN PROGRESS` |
-| 21 | [Allow HoodieRecordKey to be Virtual](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+21+%3A+Allow+HoodieRecordKey+to+be+Virtual) | `COMPLETED` |
+| RFC Number | Title                                                                                                                                                                                                                | Status |
+| ---|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --- |
+| 1 | [CSV Source Support for Delta Streamer](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+01+%3A+CSV+Source+Support+for+Delta+Streamer)                                                                         | `COMPLETED` |
+| 2 | [ORC Storage in Hudi](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708439)                                                                                                                    | `ONGOING` |
+| 3 | [Timeline Service with Incremental File System View Syncing](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708965)                                                                             | `COMPLETED` |
+| 4 | [Faster Hive incremental pull queries](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115513622)                                                                                                   | `COMPLETED` |
+| 5 | [HUI (Hudi WebUI)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=130027233)                                                                                                                       | `ABANDONED` |
+| 6 | [Add indexing support to the log file](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+06+%3A+Add+indexing+support+to+the+log+file)                                                                           | `ABANDONED` |
+| 7 | [Point in time Time-Travel queries on Hudi table](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+07+%3A+Point+in+time+Time-Travel+queries+on+Hudi+table)                                                     | `COMPLETED` |
+| 8 | [Record level indexing mechanisms for Hudi datasets](https://cwiki.apache.org/confluence/display/HUDI/RFC-08++Record+level+indexing+mechanisms+for+Hudi+datasets)                                                    | `ONGOING` |
+| 9 | [Hudi Dataset Snapshot Exporter](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+09+%3A+Hudi+Dataset+Snapshot+Exporter)                                                                                       | `COMPLETED` |
+| 10 | [Restructuring and auto-generation of docs](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+10+%3A+Restructuring+and+auto-generation+of+docs)                                                                 | `COMPLETED` |
+| 11 | [Refactor of the configuration framework of hudi project](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+11+%3A+Refactor+of+the+configuration+framework+of+hudi+project)                                     | `ABANDONED` |
+| 12 | [Efficient Migration of Large Parquet Tables to Apache Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+12+%3A+Efficient+Migration+of+Large+Parquet+Tables+to+Apache+Hudi)                               | `COMPLETED` |
+| 13 | [Integrate Hudi with Flink](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=141724520)                                                                                                              | `COMPLETED` |
+| 14 | [JDBC incremental puller](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+14+%3A+JDBC+incremental+puller)                                                                                                     | `COMPLETED` |
+| 15 | [HUDI File Listing Improvements](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements)                                                                                        | `COMPLETED` |
+| 16 | [Abstraction for HoodieInputFormat and RecordReader](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+16+Abstraction+for+HoodieInputFormat+and+RecordReader)                                                   | `COMPLETED` |
+| 17 | [Abstract common meta sync module support multiple meta service](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+17+Abstract+common+meta+sync+module+support+multiple+meta+service)                           | `COMPLETED` |
+| 18 | [Insert Overwrite API](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+18+Insert+Overwrite+API)                                                                                                               | `COMPLETED` |
+| 19 | [Clustering data for freshness and query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+19+Clustering+data+for+freshness+and+query+performance)                                                 | `COMPLETED` |
+| 20 | [handle failed records](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+20+%3A+handle+failed+records)                                                                                                         | `IN PROGRESS` |
+| 21 | [Allow HoodieRecordKey to be Virtual](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+21+%3A+Allow+HoodieRecordKey+to+be+Virtual)                                                                             | `COMPLETED` |
 | 22 | [Snapshot Isolation using Optimistic Concurrency Control for multi-writers](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+22+%3A+Snapshot+Isolation+using+Optimistic+Concurrency+Control+for+multi-writers) | `COMPLETED` |
-| 23 | [Hudi Observability metrics collection](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+23+%3A+Hudi+Observability+metrics+collection) | `UNDER REVIEW` |
-| 24 | [Hoodie Flink Writer Proposal](https://cwiki.apache.org/confluence/display/HUDI/RFC-24%3A+Hoodie+Flink+Writer+Proposal) | `UNDER REVIEW` |
-| 25 | [Spark SQL Extension For Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+25%3A+Spark+SQL+Extension+For+Hudi) | `COMPLETED` |
-| 26 | [Optimization For Hudi Table Query](https://cwiki.apache.org/confluence/display/HUDI/RFC-26+Optimization+For+Hudi+Table+Query) | `IN PROGRESS` |
-| 27 | [Data skipping index to improve query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) | `IN PROGRESS` |
-| 28 | [Support Z-order curve](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181307144) | `IN PROGRESS` |
-| 29 | [Hash Index](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+29%3A+Hash+Index) | `IN PROGRESS` |
-| 30 | [Batch operation](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+30%3A+Batch+operation) | `UNDER REVIEW` |
-| 31 | [Hive integration Improvment](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+31%3A+Hive+integration+Improvment) | `UNDER REVIEW` |
-| 32 | [Kafka Connect Sink for Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC-32+Kafka+Connect+Sink+for+Hudi) | `IN PROGRESS` |
-| 33 | [Hudi supports more comprehensive Schema Evolution](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+33++Hudi+supports+more+comprehensive+Schema+Evolution) | `IN PROGRESS` |
-| 34 | [Hudi BigQuery Integration (WIP)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=188745980) | `UNDER REVIEW` |
-| 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly) | `UNDER REVIEW` |
-| 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server) | `UNDER REVIEW` |
-| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `IN PROGRESS` |
-| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `IN PROGRESS` |
-| 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `IN PROGRESS` |
-| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `IN PROGRESS` |
-| 41 | [Hudi Snowflake Integration] | `UNDER REVIEW` |
-| 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `UNDER REVIEW` |
-| 43 | [Compaction / Clustering Service](./rfc-43/rfc-43.md) | `UNDER REVIEW` |
-| 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `UNDER REVIEW` |
-| 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `UNDER REVIEW` |
-| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `UNDER REVIEW` |
-| 47 | [Add Call Produce Command for Spark SQL](./rfc-47/rfc-47.md) | `UNDER REVIEW` |
+| 23 | [Hudi Observability metrics collection](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+23+%3A+Hudi+Observability+metrics+collection)| `ABANDONED` | 
+| 24 | [Hoodie Flink Writer Proposal](https://cwiki.apache.org/confluence/display/HUDI/RFC-24%3A+Hoodie+Flink+Writer+Proposal) | `COMPLETED` | 
+| 25 | [Spark SQL Extension For Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+25%3A+Spark+SQL+Extension+For+Hudi) | `COMPLETED` | 
+| 26 | [Optimization For Hudi Table Query](https://cwiki.apache.org/confluence/display/HUDI/RFC-26+Optimization+For+Hudi+Table+Query) | `ONGOING` | 
+| 27 | [Data skipping index to improve query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) | `ONGOING` | 
+| 28 | [Support Z-order curve](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181307144) | `COMPLETED` |
+| 29 | [Hash Index](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+29%3A+Hash+Index) | `ONGOING` | 
+| 30 | [Batch operation](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+30%3A+Batch+operation) | `UNDER REVIEW` | 
+| 31 | [Hive integration Improvement](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+31%3A+Hive+integration+Improvment)| `UNDER REVIEW` | 
+| 32 | [Kafka Connect Sink for Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC-32+Kafka+Connect+Sink+for+Hudi)| `ONGOING` | 
+| 33 | [Hudi supports more comprehensive Schema Evolution](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+33++Hudi+supports+more+comprehensive+Schema+Evolution)| `ONGOING` | 
+| 34 | [Hudi BigQuery Integration](./rfc-34/rfc-34.md) | `COMPLETED` | 
+| 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly)| `UNDER REVIEW` | 
+| 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server)| `UNDER REVIEW` | 
+| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `IN PROGRESS` | 
+| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `IN PROGRESS` | 
+| 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `ONGOING` | 
+| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `ONGOING` | 
+| 41 | [Hudi Snowflake Integration] | `UNDER REVIEW`| 
+| 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `IN PROGRESS` | 
+| 43 | [Compaction / Clustering Service](./rfc-43/rfc-43.md) | `UNDER REVIEW` | 
+| 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `UNDER REVIEW` | 
+| 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `ONGOING` | 
+| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `UNDER REVIEW` | 
+| 47 | [Add Call Produce Command for Spark SQL](./rfc-47/rfc-47.md) | `UNDER REVIEW` | 
+| 48 | [LogCompaction for MOR tables](./rfc-48/rfc-48.md) | `UNDER REVIEW` | 
+| 49 | [Support sync with DataHub](./rfc-49/rfc-49.md)    | `ONGOING` |
diff --git a/rfc/rfc-34/big-query-arch.png b/rfc/rfc-34/big-query-arch.png
new file mode 100644
index 0000000000000..4503717fd6998
Binary files /dev/null and b/rfc/rfc-34/big-query-arch.png differ
diff --git a/rfc/rfc-34/rfc-34.md b/rfc/rfc-34/rfc-34.md
new file mode 100644
index 0000000000000..bb32b07cef17e
--- /dev/null
+++ b/rfc/rfc-34/rfc-34.md
@@ -0,0 +1,182 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+# Hudi BigQuery Integration
+
+## Abstract
+
+BigQuery is Google Cloud's fully managed, petabyte-scale, and cost-effective analytics data warehouse that lets you run
+analytics over vast amounts of data in near real time. BigQuery
+currently [doesn’t support](https://cloud.google.com/bigquery/external-data-cloud-storage) Apache Hudi, but it has
+support for the Parquet and other formats. The proposal is to implement a BigQuerySync similar to HiveSync to sync the
+Hudi table as the BigQuery External Parquet table, so that users can query the Hudi tables using BigQuery. Uber is
+already syncing some of its Hudi tables to BigQuery data mart this will help them to write, sync and query.
+
+## Background
+
+Hudi table types define how data is indexed & laid out on the DFS and how the above primitives and timeline activities
+are implemented on top of such organization (i.e how data is written). In turn, query types define how the underlying
+data is exposed to the queries (i.e how data is read).
+
+Hudi supports the following table types:
+
+* [Copy On Write](https://hudi.apache.org/docs/table_types#copy-on-write-table): Stores data using exclusively columnar
+  file formats (e.g parquet). Updates simply version & rewrite the files by performing a synchronous merge during write.
+* [Merge On Read](https://hudi.apache.org/docs/table_types#merge-on-read-table): Stores data using a combination of
+  columnar (e.g parquet) + row based (e.g avro) file formats. Updates are logged to delta files & later compacted to
+  produce new versions of columnar files synchronously or asynchronously.
+
+Hudi maintains multiple versions of the Parquet files and tracks the latest version using Hudi metadata (Cow), since
+BigQuery doesn’t support Hudi yet, when you sync the Hudi’s parquet files to BigQuery and query it without Hudi’s
+metadata layer, it will query all the versions of the parquet files which might cause duplicate rows.
+
+To avoid the above scenario, this proposal is to implement a BigQuery sync tool which will use the Hudi metadata to know
+which files are latest and filter only the latest version of parquet files to BigQuery external table so that users can
+query the Hudi tables without any duplicate records.
+
+## Implementation
+
+This new feature will implement
+the [AbstractSyncTool](https://github.com/apache/hudi/blob/master/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncTool.java)
+similar to
+the [HiveSyncTool](https://github.com/apache/hudi/blob/master/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java)
+named BigQuerySyncTool with sync methods for CoW tables. The sync implementation will identify the latest parquet files
+for each .commit file and keep these manifests synced with the BigQuery manifest table. Spark datasource & DeltaStreamer
+can already take a list of such classes to keep these manifests synced.
+
+###           
+
+![alt_text](big-query-arch.png "Big Query integration architecture.")
+
+To avoid duplicate records on the Hudi CoW table, we need to generate the list of latest snapshot files and create a BQ
+table for it, then use that table to filter the duplicate records from the history table.
+
+### Steps to create Hudi table on BigQuery
+
+1. Let's say you have a Hudi table data on google cloud storage (GCS).
+
+ ```
+CREATE TABLE dwh.bq_demo_partitioned_cow (
+  id bigint, 
+  name string,
+  price double,
+  ts bigint,
+  dt string
+) 
+using hudi 
+partitioned by (dt)
+options (
+  type = 'cow',
+  primaryKey = 'id',
+  preCombineField = 'ts',
+  hoodie.datasource.write.drop.partition.columns = 'true'
+)
+location 'gs://hudi_datasets/bq_demo_partitioned_cow/';
+```
+
+BigQuery doesn't accept the partition column in the parquet schema, hence we need to drop the partition columns from the
+schema by enabling this flag:
+
+```
+hoodie.datasource.write.drop.partition.columns = 'true'
+```
+
+2. As part of the BigQuerySync, the sync tool will generate/update the manifest files inside the .hoodie metadata files.
+   For tables which already exist, you can generate a manifest file for the Hudi table which has the list of the latest
+   snapshot parquet file names in a CSV format with only one column the file name. The location of the manifest file
+   will be on the .hoodie metadata folder (`gs://bucket_name/table_name/.hoodie/manifest/latest_snapshot_files.csv`)
+
+```
+// this command is coming soon.
+// the alternative for this command could be a JAVA API to generate the manifest.
+GENERATE symlink_format_manifest FOR TABLE dwh.bq_demo_partitioned_cow;
+```
+
+3. Create a BQ table named `hudi_table_name_manifest` with only one column filename with this location gs:
+   //bucket_name/table_name/.hoodie/manifest/latest_snapshot_files.csv.
+
+```
+CREATE EXTERNAL TABLE `my-first-project.dwh.bq_demo_partitioned_cow_manifest`
+(
+  filename STRING
+)
+OPTIONS(
+  format="CSV",
+  uris=["gs://hudi_datasets/bq_demo_partitioned_cow/.hoodie/manifest/latest_snapshot_files.csv"]
+);
+```
+
+4. Create another BQ table named `hudi_table_name_history` with this location `gs://bucket_name/table_name`, don't use
+   this table to query the data, this table will have duplicate records since it scans all the versions of parquet files
+   in the table/partition folders.
+
+```
+CREATE EXTERNAL TABLE `my-first-project.dwh.bq_demo_partitioned_cow_history`
+WITH 
+  PARTITION COLUMNS 
+  OPTIONS(
+    ignore_unknown_values=true, 
+    format="PARQUET", 
+    hive_partition_uri_prefix="gs://hudi_datasets/bq_demo_partitioned_cow/",
+    uris=["gs://hudi_snowflake/bq_demo_partitioned_cow/dt=*"]
+  );
+```
+
+5. Create a BQ view with the same hudi table name with this query, this view you created has the data from the Hudi
+   table without any duplicates, you can use that table to query the data.
+
+```
+CREATE VIEW `my-first-project.dwh.bq_demo_partitioned_cow` AS 
+  SELECT
+  *
+  FROM
+  `my-first-project.dwh.bq_demo_partitioned_cow_history`
+  WHERE
+  _hoodie_file_name IN (
+    SELECT 
+      filename 
+    FROM
+      `my-first-project.dwh.bq_demo_partitioned_cow_manifest`
+  );
+```
+
+BigQuerySync tool will
+use [HoodieTableMetaClient](https://github.com/apache/hudi/blob/master/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java)
+methods to get the list of latest set of parquet data files to generate the manifest csv file, then will invoke
+the [BigQuery Java Client](https://github.com/googleapis/java-bigquery/blob/main/samples/snippets/src/main/java/com/example/bigquery/CreateTableExternalHivePartitioned.java)
+to create the manifest table, history table and hudi table views.
+
+**All the steps described here will be automated, all you have to do is to supply a bunch of configs to enable the
+BigQuery sync.**
+
+## Rollout/Adoption Plan
+
+There are no impacts to existing users since this is entirely a new feature to support a new use case hence there are no
+migrations/behavior changes required.
+
+After the BigQuery sync tool has been implemented, I will reach out to Uber's Hudi/BigQuery team to rollout this feature
+for their BigQuery ingestion service.
+
+## Test Plan
+
+This RFC aims to implement a new SyncTool to sync the Hudi table to BigQuery, to test this feature, there will be some
+test tables created and updated on to the BigQuery along with unit tests for the code. Since this is an entirely new
+feature, I am confident that this will not cause any regressions during and after roll out.
+
+## Future Plans
+
+After this feature has been rolled out, the same model can be applied to sync the Hudi tables to other external data
+warehouses like Snowflake.
diff --git a/rfc/rfc-45/async_metadata_index.png b/rfc/rfc-45/async_metadata_index.png
new file mode 100644
index 0000000000000..cc044d6c8f3fa
Binary files /dev/null and b/rfc/rfc-45/async_metadata_index.png differ
diff --git a/rfc/rfc-45/rfc-45.md b/rfc/rfc-45/rfc-45.md
new file mode 100644
index 0000000000000..f79dd896a09e6
--- /dev/null
+++ b/rfc/rfc-45/rfc-45.md
@@ -0,0 +1,376 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+# RFC-45: Asynchronous Metadata Indexing
+
+## Proposers
+
+- @codope
+- @manojpec
+
+## Approvers
+
+- @nsivabalan
+- @vinothchandar
+
+## Status
+
+JIRA: [HUDI-2488](https://issues.apache.org/jira/browse/HUDI-2488)
+
+## Abstract
+
+Metadata indexing (aka metadata bootstrapping) is the process of creation of one
+or more metadata-based indexes, e.g. data partitions to files index, that is
+stored in Hudi metadata table. Currently, the metadata table (referred as MDT
+hereafter) supports single partition which is created synchronously with the
+corresponding data table, i.e. commits are first applied to metadata table
+followed by data table. Our goal for MDT is to support multiple partitions to
+boost the performance of existing index and records lookup. However, the
+synchronous manner of metadata indexing is not very scalable as we add more
+partitions to the MDT because the regular writers (writing to the data table)
+have to wait until the MDT commit completes. In this RFC, we propose a design to
+support asynchronous metadata indexing.
+
+## Background
+
+We can read more about the MDT design
+in [RFC-15](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements)
+. Here is a quick summary of the current state (Hudi v0.10.1). MDT is an
+internal Merge-on-Read (MOR) table that has a single partition called `files`
+which stores the data partitions to files index that is used in file listing.
+MDT is co-located with the data table (inside `.hoodie/metadata` directory under
+the basepath). In order to handle multi-writer scenario, users configure lock
+provider and only one writer can access MDT in read-write mode. Hence, any write
+to MDT is guarded by the data table lock. This ensures only one write is
+committed to MDT at any point in time and thus guarantees serializability.
+However, locking overhead adversely affects the write throughput and will reach
+its scalability limits as we add more partitions to the MDT.
+
+## Goals
+
+- Support indexing one or more partitions in MDT while regular writers and table
+  services (such as cleaning or compaction) are in progress.
+- Locking to be as lightweight as possible.
+- Keep required config changes to a minimum to simplify deployment / upgrade in
+  production.
+- Do not require specific ordering of how writers and table service pipelines
+  need to be upgraded / restarted.
+- If an external long-running process is being used to initialize the index, the
+  process should be made idempotent so it can handle errors from previous runs.
+- To re-initialize the index, make it as simple as running the external
+  initialization process again without having to change configs.
+
+## Implementation
+
+### High Level Design
+
+#### A new Hudi action: INDEXING
+
+We introduce a new action `index` which will denote the index building process,
+the mechanics of which is as follows:
+
+1. From an external process, users can issue a CREATE INDEX or run a job to
+   trigger indexing for an existing table.
+    1. This will schedule INDEXING action and add
+       a `<instant_time>.index.requested` to the timeline, which contains the
+       indexing plan. Index scheduling will also initialize the filegroup for
+       the partitions for which indexing is planned. The creation of filegroups
+       will be done within a lock.
+    2. From here on, the index building process will continue to build an index
+       up to instant time `t`, where `t` is the latest completed instant time on
+       the timeline without any
+       "holes" i.e. no pending async operations prior to it.
+    3. The indexing process will write these out as base files within the
+       corresponding metadata partition. A metadata partition cannot be used if
+       there is any pending indexing action against it. As and when indexing is
+       completed for a partition, then table config (`hoodie.properties`) will
+       be updated to indicate that partition is available for reads or
+       synchronous updates. Hudi table config will be the source of truth for
+       the current state of metadata index.
+
+2. Any inflight writers (i.e. with instant time `t'` > `t`)  will check for any
+   new indexing request on the timeline prior to preparing to commit.
+    1. Such writers will proceed to additionally add log entries corresponding
+       to each such indexing request into the metadata partition.
+    2. There is always a TOCTOU issue here, where the inflight writer may not
+       see an indexing request that was just added and proceed to commit without
+       that. We will correct this during indexing action completion. In the
+       average case, this may not happen and the design has liveness.
+
+3. When the indexing process is about to complete (i.e. indexing upto
+   instant `t` is done but before completing indexing commit), it will check for
+   all completed commit instants after `t` to ensure each of them added entries
+   per its indexing plan, otherwise simply abort after a configurable timeout.
+   Let's call this the **indexing catchup**. So, the indexer will not only write
+   base files but also ensure that log entries due to instants after `t` are in
+   the same filegroup i.e. no new filegroup is initialized by writers while
+   indexing is in progress.
+    1. The corner case here would be that the indexing catchup does not factor
+       in the inflight writer just about to commit. But given indexing would
+       take some finite amount of time to go from requested to completion (or we
+       can add some, configurable artificial delays here say 60 seconds), an
+       inflight writer, that is just about to commit concurrently, has a very
+       high chance of seeing the indexing plan and aborting itself.
+
+We can just introduce a lock for adding events to the timeline and these races
+would vanish completely, still providing great scalability and asynchrony for
+these processes. The indexer will error out if there is no lock provider
+configured.
+
+#### Multi-writer scenario
+
+![](./async_metadata_index.png)
+
+Let us walkthrough a concrete mutli-writer scenario to understand the above
+indexing mechanism. In this scenario, let instant `t0` be the last completed
+instant on the timeline. Suppose user triggered index building from an external
+process at `t3`. This will create `t3.index.requested` file with the indexing
+plan. The plan contains the metadata partitions that need to be created and the
+last completed instant, e.g.
+
+```
+[
+  {MetadataPartitionType.FILES.partitionPath(), t0}, 
+  {MetadataPartitionType.BLOOM_FILTER.partitionPath(), t0}, 
+  {MetadataPartitionType.COLUMN_STATS.partitionPath(), t0}
+]
+```
+
+Further, suppose there were two inflight writers Writer1 and Writer2 (with
+inflight instants `t1` and `t2` respectively) while the indexing was requested
+or inflight. In this case, the writers will check for pending index action and
+find a pending instant `t3`. Now, if the metadata index creation is pending,
+which means indexer has already intialized a filegroup, then each writer will
+create log files in the same filegroup for the metadata index update. This will
+happen within the existing data table lock.
+
+The indexer runs in a loop until the metadata for data upto `t0` plus the data
+written due to `t1` and `t2` has been indexed, or the indexing timed out.
+Whether indexing timed out or not, table config would be updated with any MDT
+partition(s) for which indexing was complete till `t2`. In case of timeout
+indexer will abort. At this point, user can trigger the index process again,
+however, this time indexer will check for available partitions in table config
+and skip those partitions. This design ensures that the regular writers do not
+fail due to indexing.
+
+### Low Level Design
+
+#### Schedule Indexing
+
+The scheduling initializes the file groups for metadata partitions in a lock. It
+does not update any table config.
+
+```
+1 Run pre-scheduling validation (valid index requested, lock provider configured, idempotent checks)
+2 Begin transaction
+  2.a  Get the base instant
+  2.b  Start initializing file groups for each partition
+  2.c  Create index plan and save indexing.requested instant to the timeline
+3 End transaction 
+```
+
+If there is failure in any of the above steps, then we abort gracefully i.e.
+delete the metadata partition if it was initialized.
+
+#### Run Indexing
+
+This is a separate executor, which reads the plan and builds the index.
+
+```
+1 Run pre-indexing checks (lock provider configured, indexing.requested exists, idempotent checks)
+2 Read the indexing plan and if any of the requested partition is inflight or already completed then error out and return early
+3 Transition indexing.requested to inflight
+4 Build metadata partitions
+  4.a  Build the base file in the metadata partition to index upto instant as per the plan
+  4.b  Update inflight partitions config in hoodie.properties
+5 Determine the catchup start instant based on write and non-write timeline
+6 Start indexing catchup in a separate thread (that can be interrupted upon timeout)
+  6.a  For each instant to catchup
+    6.a.i  if instant is completed and has corresponding deltacommit in metadata timeline then continue
+    6.a.ii  if instant is inflight, then reload active timeline periodically until completed or timed out
+    6.a.iii update metadata table, if needed, within a lock
+7 Build indexing commit metadata with the partition info and caught upto instant
+8 Begin transaction
+  8.a  update completed metadata partitions in table config
+  8.b  save indexing commit metadata to the timeline transition indexing.inflight to completed.
+9 End transaction
+```
+
+If there is failure in any of the above steps, then we abort gracefully i.e.
+delete the metadata partition if it exists and revert the table config updates.
+
+#### Configs
+
+```
+# enable metadata
+hoodie.metadata.enable=true
+# enable asynchronous metadata indexing
+hoodie.metadata.index.async=true
+# enable column stats index
+hoodie.metadata.index.column.stats.enable=true
+# set indexing catchup timeout
+hoodie.metadata.index.check.timeout.seconds=60
+# set OCC concurrency mode
+hoodie.write.concurrency.mode=optimistic_concurrency_control
+# set lock provider
+hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
+```
+
+#### Table upgrade/downgrade
+
+While upgrading from a previous version to the current version, if metadata is
+enabled and `files` partition exists then completed partitions in
+hoodie.paroperties will be updated to `files` partition. While downgrading to a
+previous version, if metadata table exists then it is deleted because metadata
+table in current version has a schema that is not forward compatible.
+
+### Error Handling
+
+**Case 1: Writer fails while indexer is inflight**
+
+This means index update due to writer did not complete. Indexer continues to
+build the index ignoring the failed instant due to writer. The next update by
+the writer will trigger a rollback of the failed instant, which will also
+rollback incomplete updates in metadata table.
+
+**Case 2: Indexer fails while writer is inflight**
+
+Writer will commit adding log entries to the metadata partition. However, table
+config will indicate that partition is not ready to use. When indexer is
+re-triggered, it will check the plan and table config to figure out which MDT
+partitions to index and start indexing for those partitions.
+
+**Case 3: Race conditions**
+
+a) Writer went inflight just after an indexing request was added but indexer has
+not yet started executing.
+
+In this case, writer will continue to log updates in metadata partition. At the
+time of execution, indexer will see there are already some log files and ensure
+that the indexing catchup passes.
+
+b) Inflight writer about to commit, but indexing completed just before that.
+
+Ideally, the indexing catchup in the indexer should have failed. But this could
+happen in the following sequence of events:
+
+1. No pending data commit. Indexing check passed, indexing commit not
+   completed (table config yet to be updated).
+2. Writer went inflight knowing that MDT partition is not ready for use.
+3. Indexing commit done, table config updated.
+
+In this case, the writer will continue to write log files under the latest base
+filegroup in the MDT partition. Even though the indexer missed the updates due
+to writer, there is no "index loss" as such i.e. metadata due to writer is still
+updated in the MDT partition. Async compaction on the MDT will eventually merge
+the updates into another base file.
+
+Or, we can introduce a lock for adding events to the metadata timeline.
+
+c) Inflight writer about to commit but index is still being scheduled
+
+Consider the following scenario:
+
+1. Writer is in inflight mode.
+2. Indexer is starting and creating the file-groups. Suppose there are 100
+   file-groups to be created.
+3. Writer just finished and tries to write log blocks - it only sees a subset of
+   file-groups created yet (as the above step 2 above has not completed yet).
+   This will cause writer to incorrectly write updated to lesser number of
+   shards.
+
+In this case, we ensure that scheduling for metadata index always happens within
+a lock. Since the initialization of filegroups happen at the time of scheduling,
+indexer will hold the lock until all the filegroups are created.
+
+**Case 4: Async table services**
+
+The metadata partition cannot be used if there is any pending index action
+against it. So, async compaction/cleaning/clustering will ignore the metadata
+partition for which indexing is inflight.
+
+**Case 5: Data timeline with holes**
+
+Let's say the data timeline when indexer is started looks
+like: `C1, C2,.... C5 (inflight), C6, C7, C8`, where `C1` is a commit at
+instant `1`. In this case the latest completed instant without any hole is `C4`.
+So, indexer will continue to index upto `C4`. Instants `C5-C8` will go through
+the indexing catchup. If `C5` does not complete before the timeout, then indexer
+will abort. The indexer will run through the same process again when
+re-triggered.
+
+The above example contained only write commits however the indexer will consider
+non-write commits (such as clean/restore/rollback) as well. Let's take such an
+example:
+
+|  DC  |  DC  | DC | CLEAN | DC | DC | COMPACT | DC | INDEXING |  DC  |
+| ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+|  1   |  2   |  3   |  4   |  5   |  6   |  7   |  8   |  9   |  10  |
+|  C   |  C   |  C   |  I   |  C   |  C   |  R   |  C   |  R   |  I   |
+
+Here, DC indicates a deltacommit, second row is the instant time, and the last
+row is whether the action is completed (C), inflight (I) or requested(R). In
+this case, the base instant upto which there are no holes in write timeline
+is `DC6`. The indexer will also check the earliest pending instant in non-write
+timeline before this base instant, which is `CLEAN4`. While the indexing is done
+upto base instant, the remaining instants (CLEAN4, COMPACT7, DC8) are checked
+during indexing catchup whether they logged updated to corresponding filegroup
+as per the index plan. Note that during catchup, indexer won't move beyond
+unless the instants to catch up actually get into completed state. For instance,
+if the CLEAN4 was inflight till the configured timeout, then indexer will abort.
+
+## Summary of key proposals
+
+- New INDEXING action on data timeline.
+- Async indexer to handle state change for the new action.
+- Concept of "indexing catchup" to reconcile instants that went inflight after
+  indexer started.
+- Table config to be the source of truth for inflight and completed MDT
+  partitions.
+- Indexer will error out if lock provider not configured.
+
+## Rollout/Adoption Plan
+
+- What impact (if any) will there be on existing users?
+
+There can be two kinds of existing users:
+
+a) Enabling metadata for the first time: There should not be any impact on such
+users. When they enable metadata, they can trigger indexing process. b) Metadata
+already enabled: Such users already have metadata table with at least one
+partition. If they trigger indexing process, then the indexer should take into
+account the existing metadata and ignore instants upto which MDT is in sync with
+the data table.
+
+- If we are changing behavior how will we phase out the older behavior?
+
+The changes will be backward-compatible and if the async indexing is diabled
+then the existing behavior of MDT creation and updates will be used.
+
+- If we need special migration tools, describe them here.
+
+Not required.
+
+- When will we remove the existing behavior
+
+Not required
+
+## Test Plan
+
+- Extensive unit tests to cover all scenarios including conflicts and
+  error-handling.
+- Run a long-running test on EMR cluster with async indexing enabled.
diff --git a/rfc/rfc-49/DatasetProfile.png b/rfc/rfc-49/DatasetProfile.png
new file mode 100644
index 0000000000000..0f9fde18753f8
Binary files /dev/null and b/rfc/rfc-49/DatasetProfile.png differ
diff --git a/rfc/rfc-49/DatasetProperties.png b/rfc/rfc-49/DatasetProperties.png
new file mode 100644
index 0000000000000..f3e584d7d74cf
Binary files /dev/null and b/rfc/rfc-49/DatasetProperties.png differ
diff --git a/rfc/rfc-49/SchemaMetadata.png b/rfc/rfc-49/SchemaMetadata.png
new file mode 100644
index 0000000000000..8af6532581ef2
Binary files /dev/null and b/rfc/rfc-49/SchemaMetadata.png differ
diff --git a/rfc/rfc-49/rfc-49.md b/rfc/rfc-49/rfc-49.md
new file mode 100644
index 0000000000000..675ccf444d57b
--- /dev/null
+++ b/rfc/rfc-49/rfc-49.md
@@ -0,0 +1,90 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+# RFC-49: Support sync with DataHub
+
+
+## Proposers
+- @xushiyan
+
+## Approvers
+ - @vinothchandar 
+ - @Sivabalan
+
+## Status
+
+JIRA: [HUDI-3468](https://issues.apache.org/jira/browse/HUDI-3468)
+
+## Overview
+
+Support sync with [DataHub](https://datahubproject.io/) to provide rich metadata capabilities for Hudi tables.
+
+> DataHub is an open-source metadata platform for the modern data stack.
+
+Read more in https://datahubproject.io/docs/#introduction
+
+## Implementation
+
+To sync with DataHub, we can make use of existing `hudi-sync` abstraction by extending `org.apache.hudi.sync.common.AbstractSyncTool`.
+
+The sync mechanism can be implemented via [Java Emitter](https://datahubproject.io/docs/metadata-integration/java/as-a-library). The main work is about 
+
+- take in user's configurations to connect to an existing DataHub instance
+- compose desired metadata for sync based on [DataHub's metadata model](https://datahubproject.io/docs/metadata-modeling/metadata-model)
+
+## Configurations
+
+Necessary configurations will be added using pattern `hoodie.sync.datahub.*` to connect to the user-operated DataHub instance.
+
+## Metadata Model
+
+A Hudi table maps to a [Dataset entity](https://datahubproject.io/docs/generated/metamodel/entities/dataset) in DataHub.
+
+#### Identifier
+
+A Dataset can be identified by urn consists of [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform) (default `hudi`), table identifier (`<db>.<table>`), and optional/configurable environment suffix. An example:
+
+```text
+urn:li:dataset:(urn:li:dataPlatform:hudi,mydb,mytable,prod)
+```
+
+#### Schema
+
+Schema can be sync'ed via the `SchemaMetadata` aspect. `platformSchema` (raw schema) will be sync'ed using the avro schema string persisted in the commit metadata.
+
+![](SchemaMetadata.png)
+
+#### Dataset Properties
+
+Key-value table properties, e.g., last sync'ed commit timestamp, can be sync'ed via the `DatasetProperties` aspect.
+
+![](DatasetProperties.png)
+
+#### Column Stats
+
+Column stats, e.g., min/max value of selected fields, can be retrieved from Hudi metadata table's column stats partition, and sync'ed via the `fieldProfiles` of `DatasetProfile` aspect.
+
+![](DatasetProfile.png)
+
+## Rollout/Adoption Plan
+
+This is a new feature to be enabled by configuration. Users can choose to turn on or off at any time. This feature won't interfere with existing Hudi tables' operations.
+
+## Test Plan
+
+- Unit tests
+- Run a PoC setup with DataHub integration to verify the desired metadata are sync'ed
diff --git a/scripts/release/create_source_release.sh b/scripts/release/create_source_release.sh
index ff54706be237c..bd37c80b1e2cd 100755
--- a/scripts/release/create_source_release.sh
+++ b/scripts/release/create_source_release.sh
@@ -72,6 +72,7 @@ rsync -a \
   --exclude ".idea" --exclude "*.iml" --exclude ".DS_Store" --exclude "build-target" \
   --exclude "docs/content" --exclude ".rubydeps" \
   --exclude "rfc" \
+  --exclude "docker/images" \
   . hudi-$RELEASE_VERSION
 
 tar czf ${RELEASE_DIR}/hudi-${RELEASE_VERSION}.src.tgz hudi-$RELEASE_VERSION
diff --git a/scripts/release/deploy_staging_jars.sh b/scripts/release/deploy_staging_jars.sh
index 4bd9158bccae3..76f018e2025e9 100755
--- a/scripts/release/deploy_staging_jars.sh
+++ b/scripts/release/deploy_staging_jars.sh
@@ -21,40 +21,58 @@
 ## Variables with defaults (if not overwritten by environment)
 ##
 MVN=${MVN:-mvn}
-SPARK_VERSION=2
 # fail immediately
 set -o errexit
 set -o nounset
-# print command before executing
-set -o xtrace
 
-CURR_DIR=`pwd`
-if [[ `basename $CURR_DIR` != "scripts" ]] ; then
-  echo "You have to call the script from the scripts/ dir"
+CURR_DIR=$(pwd)
+if [ ! -d "$CURR_DIR/packaging" ] ; then
+  echo "You have to call the script from the repository root dir that contains 'packaging/'"
   exit 1
 fi
 
-if [[ $# -lt 1 ]]; then
-    echo "This script will deploy artifacts to staging repositories"
-    echo "There is one param required:"
-    echo "--scala_version=\${SCALA_VERSION}"
-    exit
-else
-    for param in "$@"
-    do
-	if [[ $param =~ --scala_version\=(2\.1[1-2]) ]]; then
-		SCALA_VERSION=${BASH_REMATCH[1]}
-      elif [[ $param =~ --spark_version\=([2-3]) ]]; then
-              SPARK_VERSION=${BASH_REMATCH[1]}
-	fi
-    done
+if [ "$#" -gt "1" ]; then
+  echo "Only accept 0 or 1 argument. Use -h to see examples."
+  exit 1
 fi
 
-###########################
+declare -a ALL_VERSION_OPTS=(
+"-Dscala-2.11 -Dspark2.4 -Dflink1.13"
+"-Dscala-2.12 -Dspark2.4 -Dflink1.13"
+"-Dscala-2.12 -Dspark3.1 -Dflink1.14"
+"-Dscala-2.12 -Dspark3.2 -Dflink1.14"
+)
+printf -v joined "'%s'\n" "${ALL_VERSION_OPTS[@]}"
+
+if [ "${1:-}" == "-h" ]; then
+  echo "
+Usage: $(basename "$0") [OPTIONS]
+
+Options:
+<version option>  One of the version options below
+${joined}
+-h, --help
+"
+  exit 0
+fi
 
-cd ..
+VERSION_OPT=${1:-}
+valid_version_opt=false
+for v in "${ALL_VERSION_OPTS[@]}"; do
+    [[ $VERSION_OPT == "$v" ]] && valid_version_opt=true
+done
 
-echo "Deploying to repository.apache.org with scala version ${SCALA_VERSION}"
+if [ "$valid_version_opt" = true ]; then
+  # run deploy for only specified version option
+  ALL_VERSION_OPTS=("$VERSION_OPT")
+elif [ "$#" == "1" ]; then
+  echo "Version option $VERSION_OPT is invalid. Use -h to see examples."
+  exit 1
+fi
 
-COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Prelease -DskipTests -DretryFailedDeploymentCount=10 -DdeployArtifacts=true"
-$MVN clean deploy $COMMON_OPTIONS
+for v in "${ALL_VERSION_OPTS[@]}"
+do
+  echo "Deploying to repository.apache.org with version option ${v}"
+  COMMON_OPTIONS="${v} -DdeployArtifacts=true -DskipTests -DretryFailedDeploymentCount=10"
+  $MVN clean deploy $COMMON_OPTIONS
+done
diff --git a/scripts/release/validate_staged_release.sh b/scripts/release/validate_staged_release.sh
index 681cc2018637f..0e027442570a5 100755
--- a/scripts/release/validate_staged_release.sh
+++ b/scripts/release/validate_staged_release.sh
@@ -124,7 +124,7 @@ numBinaryFiles=`find . -iname '*' | xargs -I {} file -I {} | grep -va directory
 if [ "$numBinaryFiles" -gt "0" ]; then
   echo -e "There were non-text files in source release. Please check below\n"
   find . -iname '*' | xargs -I {} file -I {} | grep -va directory | grep -v "/src/test/" | grep -va 'application/json' | grep -va 'text/' |  grep -va 'application/xml'
-  exit -1
+  exit 1
 fi
 echo -e "\t\tNo Binary Files in Source Release? - [OK]\n"
 ### END: Binary Files Check
@@ -134,7 +134,7 @@ echo "Checking for DISCLAIMER"
 disclaimerFile="./DISCLAIMER"
 if [ -f "$disclaimerFile" ]; then
   echo "DISCLAIMER file should not be present "
-  exit -1
+  exit 1
 fi
 echo -e "\t\tDISCLAIMER file exists ? [OK]\n"
 
@@ -144,23 +144,23 @@ licenseFile="./LICENSE"
 noticeFile="./NOTICE"
 if [ ! -f "$licenseFile" ]; then
   echo "License file missing"
-  exit -1
+  exit 1
 fi
 echo -e "\t\tLicense file exists ? [OK]"
 
 if [ ! -f "$noticeFile" ]; then
   echo "Notice file missing"
-  exit -1
+  exit 1
 fi
 echo -e "\t\tNotice file exists ? [OK]\n"
 
 ### Licensing Check
 echo "Performing custom Licensing Check "
-numfilesWithNoLicense=`find . -iname '*' -type f | grep -v NOTICE | grep -v LICENSE | grep -v '.json' | grep -v '.data'| grep -v '.commit' | grep -v DISCLAIMER | grep -v KEYS | grep -v '.mailmap' | grep -v '.sqltemplate' | grep -v 'ObjectSizeCalculator.java' | grep -v 'AvroConversionHelper.scala' | grep -v "fixtures" | xargs grep -L "Licensed to the Apache Software Foundation (ASF)" | wc -l`
+numfilesWithNoLicense=`find . -iname '*' -type f | grep -v NOTICE | grep -v LICENSE | grep -v '.json' | grep -v '.hfile' | grep -v '.data' | grep -v '.commit' | grep -v DISCLAIMER | grep -v KEYS | grep -v '.mailmap' | grep -v '.sqltemplate' | grep -v 'ObjectSizeCalculator.java' | grep -v 'AvroConversionHelper.scala' | grep -v "fixtures" | xargs grep -L "Licensed to the Apache Software Foundation (ASF)" | wc -l`
 if [ "$numfilesWithNoLicense" -gt  "0" ]; then
   echo "There were some source files that did not have Apache License"
-  find . -iname '*' -type f | grep -v NOTICE | grep -v LICENSE | grep -v '.json' | grep -v '.data' | grep -v '.commit' | grep -v DISCLAIMER | grep -v '.sqltemplate' | grep -v KEYS | grep -v '.mailmap' | grep -v 'ObjectSizeCalculator.java' | grep -v 'AvroConversionHelper.scala' | grep -v "fixtures" | xargs grep -L "Licensed to the Apache Software Foundation (ASF)"
-  exit -1
+  find . -iname '*' -type f | grep -v NOTICE | grep -v LICENSE | grep -v '.json' | grep -v '.hfile' | grep -v '.data' | grep -v '.commit' | grep -v DISCLAIMER | grep -v '.sqltemplate' | grep -v KEYS | grep -v '.mailmap' | grep -v 'ObjectSizeCalculator.java' | grep -v 'AvroConversionHelper.scala' | grep -v "fixtures" | xargs grep -L "Licensed to the Apache Software Foundation (ASF)"
+  exit 1
 fi
 echo -e "\t\tLicensing Check Passed [OK]\n"