apache · vinothchandar · Nov 5, 2020 · Oct 22, 2020 · n3nash · Oct 28, 2020
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
@@ -84,6 +84,11 @@
             <import>${basedir}/src/main/avro/HoodieBootstrapSourceFilePartitionInfo.avsc</import>
             <import>${basedir}/src/main/avro/HoodieBootstrapIndexInfo.avsc</import>
             <import>${basedir}/src/main/avro/HoodieBootstrapMetadata.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieSliceInfo.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieClusteringGroup.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieClusteringStrategy.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieClusteringPlan.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieRequestedReplaceMetadata.avsc</import>
           </imports>
         </configuration>
       </plugin>

diff --git a/hudi-common/src/main/avro/HoodieClusteringGroup.avsc b/hudi-common/src/main/avro/HoodieClusteringGroup.avsc
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+   "namespace":"org.apache.hudi.avro.model",
+   "type":"record",
+   "name":"HoodieClusteringGroup",
+   "type":"record",
+   "fields":[
+      {
+         /* Group of files that needs to merged. All the slices in a group will belong to same partition initially.
+          * Files of different partitions may be grouped later when we have better on disk layout with indexing support.
+          */
+         "name":"slices",
+         "type":["null", {
+           "type":"array",
+           "items": "HoodieSliceInfo"
+         }],
+         "default": null
+      },
+      {
+         "name":"metrics",
+         "type":["null", {
+            "type":"map",
+            "values":"double"
+         }],
+         "default": null
+      },
+      {
+         "name":"version",
+         "type":["int", "null"],
+         "default": 1
+      }
+   ]
+}
diff --git a/hudi-common/src/main/avro/HoodieClusteringPlan.avsc b/hudi-common/src/main/avro/HoodieClusteringPlan.avsc
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+   "namespace":"org.apache.hudi.avro.model",
+   "type":"record",
+   "name":"HoodieClusteringPlan",
+   "fields":[
+     {
+         "name":"inputGroups",
+         "type":["null", {
+            "type":"array",
+            "items": "HoodieClusteringGroup"
+         }],
+       "default": null
+    },
+    {
+       "name":"strategy",
+       "type":["HoodieClusteringStrategy", "null"],
+       "default": null
+    },
+    {
+       "name":"extraMetadata",
+       "type":["null", {
+          "type":"map",
+          "values":"string"
+       }],
+       "default": null
+    },
+    {
+       "name":"version",
+       "type":["int", "null"],
+       "default": 1
+    }
+  ]
+}
diff --git a/hudi-common/src/main/avro/HoodieClusteringStrategy.avsc b/hudi-common/src/main/avro/HoodieClusteringStrategy.avsc
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+   "namespace":"org.apache.hudi.avro.model",
+   "name":"HoodieClusteringStrategy",
+   "type":"record",
+   "fields":[
+      {
+        "name":"strategyClassName", /* have to be subclass of ClusteringStrategy interface defined in hudi. ClusteringStrategy class include methods like getPartitioner */
+        "type":["null","string"],
+        "default": null
+      },
+      {
+         "name":"strategyParams", /* Parameters could be different for different strategies. example, if sorting is needed for the strategy, parameters can contain sortColumns */
+         "type":["null", {
+            "type":"map",
+            "values":"string"
+         }],
+         "default": null
+      },
+      {
+        "name":"version",
+        "type":["int", "null"],
+        "default": 1
+      }
+   ]
+}
diff --git a/hudi-common/src/main/avro/HoodieRequestedReplaceMetadata.avsc b/hudi-common/src/main/avro/HoodieRequestedReplaceMetadata.avsc
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+   "namespace":"org.apache.hudi.avro.model",
+   "type":"record",
+   "name":"HoodieRequestedReplaceMetadata",
+   "fields":[
+     {
+         "name":"operationType",
+         "type":["null", "string"],
+         "default": ""
+    },
+    {
+       "name":"clusteringPlan", /* only set if operationType == clustering" */
+       "type":["HoodieClusteringPlan", "null"],
+       "default": null
+    },
+    {
+       "name":"extraMetadata",
+       "type":["null", {
+          "type":"map",
+          "values":"string"
+       }],
+       "default": null
+    },
+    {
+       "name":"version",
+       "type":["int", "null"],
+       "default": 1
+    }
+  ]
+}
diff --git a/hudi-common/src/main/avro/HoodieSliceInfo.avsc b/hudi-common/src/main/avro/HoodieSliceInfo.avsc
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+   "namespace":"org.apache.hudi.avro.model",
+   "type":"record",
+   "name":"HoodieSliceInfo",
+   "fields":[
+      {
+        "name":"dataFilePath",
+        "type":["null","string"],
+        "default": null
+      },
+      {
+        "name":"deltaFilePaths",
+        "type":["null", {
+           "type":"array",
+           "items":"string"
+        }],
+        "default": null
+      },
+      {
+        "name":"fileId",
+        "type":["null","string"]
+      },
+      {
+        "name":"partitionPath",
+        "type":["null","string"],
+        "default": null
+      },
+      {
+        "name":"bootstrapFilePath",
+        "type":["null", "string"],
+        "default": null
+      },
+      {
+        "name":"version",
+        "type":["int", "null"],
+        "default": 1
+      }
+   ]
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java b/hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
@@ -40,6 +40,8 @@ public enum WriteOperationType {
   BOOTSTRAP("bootstrap"),
   // insert overwrite
   INSERT_OVERWRITE("insert_overwrite"),
+  // cluster
+  CLUSTER("cluster"),
   // used for old version
   UNKNOWN("unknown");
 

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
@@ -412,6 +412,14 @@ public void saveToCompactionRequested(HoodieInstant instant, Option<byte[]> cont
     createFileInMetaPath(instant.getFileName(), content, overwrite);
   }
 
+  /**
+   * Saves content for inflight/requested REPLACE instant.
+   */
+  public void saveToPendingReplaceCommit(HoodieInstant instant, Option<byte[]> content) {
+    ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION));
+    createFileInMetaPath(instant.getFileName(), content, false);
+  }
+
   public void saveToCleanRequested(HoodieInstant instant, Option<byte[]> content) {
     ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.CLEAN_ACTION));
     ValidationUtils.checkArgument(instant.getState().equals(State.REQUESTED));

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
@@ -118,6 +118,12 @@ public HoodieTimeline getCompletedReplaceTimeline() {
         instants.stream().filter(s -> s.getAction().equals(REPLACE_COMMIT_ACTION)).filter(s -> s.isCompleted()), details);
   }
 
+  @Override
+  public HoodieTimeline filterPendingReplaceTimeline() {
+    return new HoodieDefaultTimeline(instants.stream().filter(
+        s -> s.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION) && !s.isCompleted()), details);
+  }
+
   @Override
   public HoodieTimeline filterPendingCompactionTimeline() {
     return new HoodieDefaultTimeline(

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
@@ -151,6 +151,12 @@ public interface HoodieTimeline extends Serializable {
    */
   HoodieTimeline filterPendingCompactionTimeline();
 
+  /**
+   * Filter this timeline to just include requested and inflight replacecommit instants.
+   */
+  HoodieTimeline filterPendingReplaceTimeline();
+
+
   /**
    * Create a new Timeline with all the instants after startTs.
    */

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
@@ -18,10 +18,20 @@
 
 package org.apache.hudi.common.table.timeline;
 
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.file.FileReader;
+import org.apache.avro.file.SeekableByteArrayInput;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieInstantInfo;
+import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
@@ -31,16 +41,6 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.file.FileReader;
-import org.apache.avro.file.SeekableByteArrayInput;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.io.DatumWriter;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.avro.specific.SpecificDatumWriter;
-import org.apache.avro.specific.SpecificRecordBase;
-
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.Collections;
@@ -115,6 +115,10 @@ public static Option<byte[]> serializeRestoreMetadata(HoodieRestoreMetadata rest
     return serializeAvroMetadata(restoreMetadata, HoodieRestoreMetadata.class);
   }
 
+  public static Option<byte[]> serializeRequestedReplaceMetadata(HoodieRequestedReplaceMetadata clusteringPlan) throws IOException {
+    return serializeAvroMetadata(clusteringPlan, HoodieRequestedReplaceMetadata.class);
+  }
+
   public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz)
       throws IOException {
     DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
@@ -146,6 +150,10 @@ public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[]
     return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
   }
 
+  public static HoodieRequestedReplaceMetadata deserializeRequestedReplaceMetadta(byte[] bytes) throws IOException {
+    return deserializeAvroMetadata(bytes, HoodieRequestedReplaceMetadata.class);
+  }
+
   public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
       throws IOException {
     DatumReader<T> reader = new SpecificDatumReader<>(clazz);