diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
index 2fbd71dc6ab26..41b0a107c2c44 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
@@ -301,6 +301,7 @@ private String getDefaultIndexType(EngineType engineType) {
case SPARK:
return HoodieIndex.IndexType.BLOOM.name();
case FLINK:
+ case JAVA:
return HoodieIndex.IndexType.INMEMORY.name();
default:
throw new HoodieNotSupportedException("Unsupported engine " + engineType);
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
index 0ef741f924772..8a020c0e83195 100644
--- a/hudi-client/hudi-java-client/pom.xml
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -66,6 +66,19 @@
${project.version}
test
+
+ ${hive.groupid}
+ hive-exec
+ ${hive.version}
+ test
+ ${hive.exec.classifier}
+
+
+ ${hive.groupid}
+ hive-metastore
+ ${hive.version}
+ test
+
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaBulkInsertInternalPartitionerFactory.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaBulkInsertInternalPartitionerFactory.java
new file mode 100644
index 0000000000000..62523d3399054
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaBulkInsertInternalPartitionerFactory.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.execution.bulkinsert;
+
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.table.BulkInsertPartitioner;
+
+/**
+ * A factory to generate built-in partitioner to repartition input records into at least
+ * expected number of output spark partitions for bulk insert operation.
+ */
+public abstract class JavaBulkInsertInternalPartitionerFactory {
+
+ public static BulkInsertPartitioner get(BulkInsertSortMode sortMode) {
+ switch (sortMode) {
+ case NONE:
+ return new JavaNonSortPartitioner();
+ case GLOBAL_SORT:
+ return new JavaGlobalSortPartitioner();
+ default:
+ throw new HoodieException("The bulk insert sort mode \"" + sortMode.name()
+ + "\" is not supported in java client.");
+ }
+ }
+}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java
new file mode 100644
index 0000000000000..fded0ffab51bd
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.execution.bulkinsert;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.table.BulkInsertPartitioner;
+
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * A built-in partitioner that does global sorting for the input records across partitions
+ * after repartition for bulk insert operation, corresponding to the
+ * {@code BulkInsertSortMode.GLOBAL_SORT} mode.
+ *
+ * @param HoodieRecordPayload type
+ */
+public class JavaGlobalSortPartitioner
+ implements BulkInsertPartitioner>> {
+
+ @Override
+ public List> repartitionRecords(List> records,
+ int outputSparkPartitions) {
+ // Now, sort the records and line them up nicely for loading.
+ records.sort(new Comparator() {
+ @Override
+ public int compare(Object o1, Object o2) {
+ HoodieRecord o11 = (HoodieRecord) o1;
+ HoodieRecord o22 = (HoodieRecord) o2;
+ String left = new StringBuilder()
+ .append(o11.getPartitionPath())
+ .append("+")
+ .append(o11.getRecordKey())
+ .toString();
+ String right = new StringBuilder()
+ .append(o22.getPartitionPath())
+ .append("+")
+ .append(o22.getRecordKey())
+ .toString();
+ return left.compareTo(right);
+ }
+ });
+ return records;
+ }
+
+ @Override
+ public boolean arePartitionRecordsSorted() {
+ return true;
+ }
+}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java
new file mode 100644
index 0000000000000..b40459d838444
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.execution.bulkinsert;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.table.BulkInsertPartitioner;
+
+import java.util.List;
+
+/**
+ * A built-in partitioner that only does coalesce for input records for bulk insert operation,
+ * corresponding to the {@code BulkInsertSortMode.NONE} mode.
+ *
+ * @param HoodieRecordPayload type
+ */
+public class JavaNonSortPartitioner
+ implements BulkInsertPartitioner>> {
+
+ @Override
+ public List> repartitionRecords(List> records,
+ int outputPartitions) {
+ return records;
+ }
+
+ @Override
+ public boolean arePartitionRecordsSorted() {
+ return false;
+ }
+}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
index 9895df3a3e254..157e11a55d6de 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
@@ -39,10 +39,17 @@
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.action.clean.JavaCleanActionExecutor;
import org.apache.hudi.table.action.commit.JavaDeleteCommitActionExecutor;
+import org.apache.hudi.table.action.commit.JavaBulkInsertCommitActionExecutor;
+import org.apache.hudi.table.action.commit.JavaBulkInsertPreppedCommitActionExecutor;
import org.apache.hudi.table.action.commit.JavaInsertCommitActionExecutor;
+import org.apache.hudi.table.action.commit.JavaInsertOverwriteCommitActionExecutor;
+import org.apache.hudi.table.action.commit.JavaInsertOverwriteTableCommitActionExecutor;
import org.apache.hudi.table.action.commit.JavaInsertPreppedCommitActionExecutor;
import org.apache.hudi.table.action.commit.JavaUpsertCommitActionExecutor;
import org.apache.hudi.table.action.commit.JavaUpsertPreppedCommitActionExecutor;
+import org.apache.hudi.table.action.restore.JavaCopyOnWriteRestoreActionExecutor;
+import org.apache.hudi.table.action.rollback.JavaCopyOnWriteRollbackActionExecutor;
+import org.apache.hudi.table.action.savepoint.SavepointActionExecutor;
import java.util.List;
import java.util.Map;
@@ -75,7 +82,8 @@ public HoodieWriteMetadata> bulkInsert(HoodieEngineContext con
String instantTime,
List> records,
Option>>> bulkInsertPartitioner) {
- throw new HoodieNotSupportedException("BulkInsert is not supported yet");
+ return new JavaBulkInsertCommitActionExecutor((HoodieJavaEngineContext) context, config,
+ this, instantTime, records, bulkInsertPartitioner).execute();
}
@Override
@@ -112,21 +120,24 @@ public HoodieWriteMetadata> bulkInsertPrepped(HoodieEngineCont
String instantTime,
List> preppedRecords,
Option>>> bulkInsertPartitioner) {
- throw new HoodieNotSupportedException("BulkInsertPrepped is not supported yet");
+ return new JavaBulkInsertPreppedCommitActionExecutor((HoodieJavaEngineContext) context, config,
+ this, instantTime, preppedRecords, bulkInsertPartitioner).execute();
}
@Override
public HoodieWriteMetadata> insertOverwrite(HoodieEngineContext context,
String instantTime,
List> records) {
- throw new HoodieNotSupportedException("InsertOverwrite is not supported yet");
+ return new JavaInsertOverwriteCommitActionExecutor(
+ context, config, this, instantTime, records).execute();
}
@Override
public HoodieWriteMetadata> insertOverwriteTable(HoodieEngineContext context,
String instantTime,
List> records) {
- throw new HoodieNotSupportedException("InsertOverwrite is not supported yet");
+ return new JavaInsertOverwriteTableCommitActionExecutor(
+ context, config, this, instantTime, records).execute();
}
@Override
@@ -175,7 +186,8 @@ public HoodieRollbackMetadata rollback(HoodieEngineContext context,
String rollbackInstantTime,
HoodieInstant commitInstant,
boolean deleteInstants) {
- throw new HoodieNotSupportedException("Rollback is not supported yet");
+ return new JavaCopyOnWriteRollbackActionExecutor(
+ context, config, this, rollbackInstantTime, commitInstant, deleteInstants).execute();
}
@Override
@@ -183,13 +195,15 @@ public HoodieSavepointMetadata savepoint(HoodieEngineContext context,
String instantToSavepoint,
String user,
String comment) {
- throw new HoodieNotSupportedException("Savepoint is not supported yet");
+ return new SavepointActionExecutor(
+ context, config, this, instantToSavepoint, user, comment).execute();
}
@Override
public HoodieRestoreMetadata restore(HoodieEngineContext context,
String restoreInstantTime,
String instantToRestore) {
- throw new HoodieNotSupportedException("Restore is not supported yet");
+ return new JavaCopyOnWriteRestoreActionExecutor((HoodieJavaEngineContext) context,
+ config, this, restoreInstantTime, instantToRestore).execute();
}
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
index 3e0b80c5d5a41..a4a6a4f92108c 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
@@ -121,6 +121,7 @@ public HoodieWriteMetadata> execute(List> inpu
}
});
updateIndex(writeStatuses, result);
+ updateIndexAndCommitIfNeeded(writeStatuses, result);
return result;
}
@@ -297,8 +298,7 @@ protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId,
}
@Override
- public Iterator> handleInsert(String idPfx, Iterator> recordItr)
- throws Exception {
+ public Iterator> handleInsert(String idPfx, Iterator> recordItr) {
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
if (!recordItr.hasNext()) {
LOG.info("Empty partition");
@@ -325,4 +325,13 @@ public Partitioner getInsertPartitioner(WorkloadProfile profile) {
return getUpsertPartitioner(profile);
}
+ public void updateIndexAndCommitIfNeeded(List writeStatuses, HoodieWriteMetadata result) {
+ Instant indexStartTime = Instant.now();
+ // Update the index back
+ List statuses = table.getIndex().updateLocation(writeStatuses, context, table);
+ result.setIndexUpdateDuration(Duration.between(indexStartTime, Instant.now()));
+ result.setWriteStatuses(statuses);
+ result.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(statuses));
+ commitOnAutoCommit(result);
+ }
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java
new file mode 100644
index 0000000000000..9780262fb2b92
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.client.common.HoodieJavaEngineContext;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieInsertException;
+import org.apache.hudi.table.BulkInsertPartitioner;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+
+import java.util.List;
+import java.util.Map;
+
+public class JavaBulkInsertCommitActionExecutor> extends BaseJavaCommitActionExecutor {
+
+ private final List> inputRecords;
+ private final Option>>> bulkInsertPartitioner;
+
+ public JavaBulkInsertCommitActionExecutor(HoodieJavaEngineContext context, HoodieWriteConfig config, HoodieTable table,
+ String instantTime, List> inputRecords,
+ Option>>> bulkInsertPartitioner) {
+ this(context, config, table, instantTime, inputRecords, bulkInsertPartitioner, Option.empty());
+ }
+
+ public JavaBulkInsertCommitActionExecutor(HoodieJavaEngineContext context, HoodieWriteConfig config, HoodieTable table,
+ String instantTime, List> inputRecords,
+ Option>>> bulkInsertPartitioner,
+ Option