-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-3963] Use Lock-Free Message Queue Disruptor Improving Hoodie Writing Efficiency #5416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 22 commits
527ade2
2b096e8
19ce784
1db7936
725f5c3
71a9f13
b5843f0
4a1f01c
4b94523
69904cc
204abe2
45979df
075cab1
e87bfbc
79dcebe
e85c961
b838e1f
e00a109
74068f6
03b328e
3778cf4
8041a02
a04be3c
1b86864
6114ee2
67d8cd3
b369ccb
92760db
3e44fb2
672fdc6
c0c2274
4587303
abd8b27
4ba91d4
70ca2c7
447cb45
f0b7218
eaf0ae0
270d811
0fc24d3
4797166
1fa9ac7
e230fbb
298f66d
8ded3e8
794e30b
25fcd5d
6e56bd1
d615f1f
c748320
3f3a41a
9fe6bda
a81ffdf
fd19907
e5c17f0
c13fc35
d1970a3
988e14a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -129,6 +129,14 @@ public class HoodieWriteConfig extends HoodieConfig { | |
| .withDocumentation("Key generator class, that implements `org.apache.hudi.keygen.KeyGenerator` " | ||
| + "extract a key out of incoming records."); | ||
|
|
||
| public static final ConfigProperty<String> EXECUTOR_TYPE = ConfigProperty | ||
| .key("hoodie.write.executor.type") | ||
| .defaultValue("BOUNDED_IN_MEMORY_EXECUTOR") | ||
| .withDocumentation("Set executor which orchestrates concurrent producers and consumers communicating through a message queue." | ||
| + "default value is BOUNDED_IN_MEMORY_EXECUTOR which use a bounded in-memory queue using LinkedBlockingQueue." | ||
| + "Also users could use DISRUPTOR_EXECUTOR, which use disruptor as a lock free message queue " | ||
| + "to gain better writing performance. Although DISRUPTOR_EXECUTOR is still an experimental feature."); | ||
|
|
||
| public static final ConfigProperty<String> KEYGENERATOR_TYPE = ConfigProperty | ||
| .key("hoodie.datasource.write.keygenerator.type") | ||
| .defaultValue(KeyGeneratorType.SIMPLE.name()) | ||
|
|
@@ -235,6 +243,16 @@ public class HoodieWriteConfig extends HoodieConfig { | |
| .defaultValue(String.valueOf(4 * 1024 * 1024)) | ||
| .withDocumentation("Size of in-memory buffer used for parallelizing network reads and lake storage writes."); | ||
|
|
||
| public static final ConfigProperty<Integer> WRITE_BUFFER_SIZE = ConfigProperty | ||
| .key("hoodie.write.buffer.size") | ||
|
||
| .defaultValue(1024) | ||
| .withDocumentation("The size of the Disruptor Executor ring buffer, must be power of 2"); | ||
|
|
||
| public static final ConfigProperty<String> WRITE_WAIT_STRATEGY = ConfigProperty | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed. |
||
| .key("hoodie.write.wait.strategy") | ||
| .defaultValue("BlockingWaitStrategy") | ||
|
||
| .withDocumentation("Strategy employed for making DisruptorExecutor wait on a cursor."); | ||
|
|
||
| public static final ConfigProperty<String> COMBINE_BEFORE_INSERT = ConfigProperty | ||
| .key("hoodie.combine.before.insert") | ||
| .defaultValue("false") | ||
|
|
@@ -975,6 +993,10 @@ public String getKeyGeneratorClass() { | |
| return getString(KEYGENERATOR_CLASS_NAME); | ||
| } | ||
|
|
||
| public String getExecutorType() { | ||
| return getString(EXECUTOR_TYPE); | ||
| } | ||
|
|
||
| public boolean isConsistentLogicalTimestampEnabled() { | ||
| return getBooleanOrDefault(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED); | ||
| } | ||
|
|
@@ -1035,6 +1057,14 @@ public int getWriteBufferLimitBytes() { | |
| return Integer.parseInt(getStringOrDefault(WRITE_BUFFER_LIMIT_BYTES_VALUE)); | ||
| } | ||
|
|
||
| public String getWriteWaitStrategy() { | ||
|
||
| return getString(WRITE_WAIT_STRATEGY); | ||
| } | ||
|
|
||
| public int getWriteBufferSize() { | ||
|
||
| return getInt(WRITE_BUFFER_SIZE); | ||
| } | ||
|
|
||
| public boolean shouldCombineBeforeInsert() { | ||
| return getBoolean(COMBINE_BEFORE_INSERT); | ||
| } | ||
|
|
@@ -2205,6 +2235,11 @@ public Builder withKeyGenerator(String keyGeneratorClass) { | |
| return this; | ||
| } | ||
|
|
||
| public Builder withExecutorName(String executorClass) { | ||
|
||
| writeConfig.setValue(EXECUTOR_TYPE, executorClass); | ||
| return this; | ||
| } | ||
|
|
||
| public Builder withTimelineLayoutVersion(int version) { | ||
| writeConfig.setValue(TIMELINE_LAYOUT_VERSION_NUM, String.valueOf(version)); | ||
| return this; | ||
|
|
@@ -2251,6 +2286,16 @@ public Builder withWriteBufferLimitBytes(int writeBufferLimit) { | |
| return this; | ||
| } | ||
|
|
||
| public Builder withWriteWaitStrategy(String waitStrategy) { | ||
| writeConfig.setValue(WRITE_WAIT_STRATEGY, String.valueOf(waitStrategy)); | ||
| return this; | ||
| } | ||
|
|
||
| public Builder withWriteBufferSize(int size) { | ||
| writeConfig.setValue(WRITE_BUFFER_SIZE, String.valueOf(size)); | ||
| return this; | ||
| } | ||
|
|
||
| public Builder combineInput(boolean onInsert, boolean onUpsert) { | ||
| writeConfig.setValue(COMBINE_BEFORE_INSERT, String.valueOf(onInsert)); | ||
| writeConfig.setValue(COMBINE_BEFORE_UPSERT, String.valueOf(onUpsert)); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,9 @@ | |
| import org.apache.hudi.common.model.HoodieRecord; | ||
| import org.apache.hudi.common.model.HoodieRecordPayload; | ||
| import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor; | ||
| import org.apache.hudi.common.util.queue.DisruptorExecutor; | ||
| import org.apache.hudi.common.util.queue.ExecutorType; | ||
| import org.apache.hudi.common.util.queue.HoodieExecutor; | ||
| import org.apache.hudi.config.HoodieWriteConfig; | ||
| import org.apache.hudi.exception.HoodieException; | ||
| import org.apache.hudi.io.WriteHandleFactory; | ||
|
|
@@ -33,6 +36,7 @@ | |
|
|
||
| import java.util.Iterator; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
|
|
||
| public class SparkLazyInsertIterable<T extends HoodieRecordPayload> extends HoodieLazyInsertIterable<T> { | ||
|
|
||
|
|
@@ -77,16 +81,35 @@ public SparkLazyInsertIterable(Iterator<HoodieRecord<T>> recordItr, | |
| @Override | ||
| protected List<WriteStatus> computeNext() { | ||
| // Executor service used for launching writer thread. | ||
| BoundedInMemoryExecutor<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> bufferedIteratorExecutor = | ||
| null; | ||
| HoodieExecutor<?, ?, List<WriteStatus>> bufferedIteratorExecutor = null; | ||
| try { | ||
| Schema schema = new Schema.Parser().parse(hoodieConfig.getSchema()); | ||
| if (useWriterSchema) { | ||
| schema = HoodieAvroUtils.addMetadataFields(schema); | ||
| } | ||
| bufferedIteratorExecutor = | ||
| new BoundedInMemoryExecutor<>(hoodieConfig.getWriteBufferLimitBytes(), inputItr, getInsertHandler(), | ||
|
|
||
| String executorType = hoodieConfig.getExecutorType(); | ||
|
||
| ExecutorType executorTypeEnum; | ||
|
|
||
| try { | ||
| executorTypeEnum = ExecutorType.valueOf(executorType.toUpperCase(Locale.ROOT)); | ||
| } catch (IllegalArgumentException e) { | ||
| throw new HoodieException("Unsupported Executor Type " + executorType); | ||
| } | ||
|
|
||
| switch (executorTypeEnum) { | ||
| case BOUNDED_IN_MEMORY_EXECUTOR: | ||
|
||
| bufferedIteratorExecutor = new BoundedInMemoryExecutor<>(hoodieConfig.getWriteBufferLimitBytes(), inputItr, getInsertHandler(), | ||
| getTransformFunction(schema, hoodieConfig), hoodieTable.getPreExecuteRunnable()); | ||
| break; | ||
| case DISRUPTOR_EXECUTOR: | ||
| bufferedIteratorExecutor = new DisruptorExecutor<>(hoodieConfig.getWriteBufferSize(), inputItr, getInsertHandler(), | ||
| getTransformFunction(schema, hoodieConfig), hoodieConfig.getWriteWaitStrategy(), hoodieTable.getPreExecuteRunnable()); | ||
| break; | ||
| default: | ||
| throw new HoodieException("Unsupported Executor Type " + executorType); | ||
| } | ||
|
|
||
| final List<WriteStatus> result = bufferedIteratorExecutor.execute(); | ||
| assert result != null && !result.isEmpty() && !bufferedIteratorExecutor.isRemaining(); | ||
| return result; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,174 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hudi.execution; | ||
|
|
||
| import static org.apache.hudi.execution.HoodieLazyInsertIterable.getTransformFunction; | ||
|
|
||
| import org.apache.avro.generic.IndexedRecord; | ||
| import org.apache.hudi.common.model.HoodieRecord; | ||
| import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; | ||
| import org.apache.hudi.common.testutils.HoodieTestDataGenerator; | ||
| import org.apache.hudi.common.util.Option; | ||
| import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer; | ||
| import org.apache.hudi.common.util.queue.DisruptorExecutor; | ||
| import org.apache.hudi.common.util.queue.WaitStrategyFactory; | ||
| import org.apache.hudi.config.HoodieWriteConfig; | ||
| import org.apache.hudi.exception.HoodieException; | ||
| import org.apache.hudi.testutils.HoodieClientTestHarness; | ||
| import org.apache.spark.TaskContext; | ||
| import org.apache.spark.TaskContext$; | ||
| import org.junit.jupiter.api.AfterEach; | ||
| import org.junit.jupiter.api.BeforeEach; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| import java.util.List; | ||
| import java.util.concurrent.ExecutorService; | ||
| import java.util.concurrent.Executors; | ||
| import java.util.concurrent.Future; | ||
| import java.util.concurrent.atomic.AtomicReference; | ||
|
|
||
| import scala.Tuple2; | ||
|
|
||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
| import static org.junit.jupiter.api.Assertions.assertFalse; | ||
| import static org.junit.jupiter.api.Assertions.assertTrue; | ||
| import static org.mockito.Mockito.mock; | ||
| import static org.mockito.Mockito.when; | ||
|
|
||
| public class TestDisruptorExecutionInSpark extends HoodieClientTestHarness { | ||
|
|
||
| private final String instantTime = HoodieActiveTimeline.createNewInstantTime(); | ||
|
|
||
| @BeforeEach | ||
| public void setUp() throws Exception { | ||
| initTestDataGenerator(); | ||
| initExecutorServiceWithFixedThreadPool(2); | ||
| } | ||
|
|
||
| @AfterEach | ||
| public void tearDown() throws Exception { | ||
| cleanupResources(); | ||
| } | ||
|
|
||
| private Runnable getPreExecuteRunnable() { | ||
| final TaskContext taskContext = TaskContext.get(); | ||
| return () -> TaskContext$.MODULE$.setTaskContext(taskContext); | ||
| } | ||
|
|
||
| @Test | ||
| public void testExecutor() { | ||
|
|
||
| final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, 128); | ||
|
|
||
| HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class); | ||
| when(hoodieWriteConfig.getWriteBufferSize()).thenReturn(8); | ||
| BoundedInMemoryQueueConsumer<HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>, Integer> consumer = | ||
| new BoundedInMemoryQueueConsumer<HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>, Integer>() { | ||
|
|
||
| private int count = 0; | ||
|
|
||
| @Override | ||
| protected void consumeOneRecord(HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> record) { | ||
| count++; | ||
alexeykudinkin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| @Override | ||
| protected void finish() { | ||
| } | ||
|
|
||
| @Override | ||
| protected Integer getResult() { | ||
| return count; | ||
| } | ||
| }; | ||
| DisruptorExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> exec = null; | ||
|
|
||
| try { | ||
| exec = new DisruptorExecutor(hoodieWriteConfig.getWriteBufferSize(), hoodieRecords.iterator(), consumer, | ||
| getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA), WaitStrategyFactory.DEFAULT_STRATEGY, getPreExecuteRunnable()); | ||
| int result = exec.execute(); | ||
| // It should buffer and write 100 records | ||
alexeykudinkin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| assertEquals(128, result); | ||
| // There should be no remaining records in the buffer | ||
| assertFalse(exec.isRemaining()); | ||
| } finally { | ||
| if (exec != null) { | ||
| exec.shutdownNow(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testInterruptExecutor() { | ||
| final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, 100); | ||
| ExecutorService pool = Executors.newSingleThreadExecutor(); | ||
|
|
||
| HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class); | ||
| when(hoodieWriteConfig.getWriteBufferSize()).thenReturn(1024); | ||
| BoundedInMemoryQueueConsumer<HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>, Integer> consumer = | ||
| new BoundedInMemoryQueueConsumer<HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>, Integer>() { | ||
|
|
||
| @Override | ||
| protected void consumeOneRecord(HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> record) { | ||
| try { | ||
| while (true) { | ||
| Thread.sleep(1000); | ||
|
||
| } | ||
| } catch (InterruptedException ie) { | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| protected void finish() { | ||
| } | ||
|
|
||
| @Override | ||
| protected Integer getResult() { | ||
| return 0; | ||
| } | ||
| }; | ||
|
|
||
| DisruptorExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null; | ||
| AtomicReference<Exception> actualException = new AtomicReference<>(); | ||
| try { | ||
| executor = new DisruptorExecutor(hoodieWriteConfig.getWriteBufferSize(), hoodieRecords.iterator(), consumer, | ||
| getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA), WaitStrategyFactory.DEFAULT_STRATEGY, getPreExecuteRunnable()); | ||
| DisruptorExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> finalExecutor = executor; | ||
|
||
|
|
||
| Future<?> future = pool.submit(() -> { | ||
| try { | ||
| finalExecutor.execute(); | ||
| } catch (Exception e) { | ||
| actualException.set(e); | ||
| } | ||
|
|
||
| }); | ||
| future.cancel(true); | ||
| future.get(); | ||
| assertTrue(actualException.get() instanceof HoodieException); | ||
|
||
| } catch (Exception e) { | ||
| // ignore here | ||
| } finally { | ||
| if (executor != null) { | ||
| executor.shutdownNow(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we introduce enum for this. also, we can remove "EXECUTOR" suffix from it. for eg, in case of Key gen, enum is named as KeyGeneratorType and value is just "SIMPLE". we don't call it as "SIMPOLE_KEY_GEN" as its repetitive.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure thing. Changed!