-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-3350][HUDI-3351] Support HoodieMerge API and Spark engine-specific HoodieRecord #5627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a481735
df939cc
c8c4312
aec3a3a
d727e45
69c48a0
ebd49ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ | |
| import org.apache.hudi.common.engine.EngineType; | ||
| import org.apache.hudi.common.fs.ConsistencyGuardConfig; | ||
| import org.apache.hudi.common.fs.FileSystemRetryConfig; | ||
| import org.apache.hudi.common.model.HoodieAvroRecordMerge; | ||
| import org.apache.hudi.common.model.HoodieCleaningPolicy; | ||
| import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; | ||
| import org.apache.hudi.common.model.HoodieFileFormat; | ||
|
|
@@ -123,6 +124,12 @@ public class HoodieWriteConfig extends HoodieConfig { | |
| .withDocumentation("Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting. " | ||
| + "This will render any value set for PRECOMBINE_FIELD_OPT_VAL in-effective"); | ||
|
|
||
| public static final ConfigProperty<String> MERGE_CLASS_NAME = ConfigProperty | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can make this just
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are mainly consistent with payload, which is
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would suggest to use |
||
| .key("hoodie.datasource.write.merge.class") | ||
| .defaultValue(HoodieAvroRecordMerge.class.getName()) | ||
| .withDocumentation("Merge class provide stateless component interface for merging records, and support various HoodieRecord " | ||
| + "types, such as Spark records or Flink records."); | ||
|
|
||
| public static final ConfigProperty<String> KEYGENERATOR_CLASS_NAME = ConfigProperty | ||
| .key("hoodie.datasource.write.keygenerator.class") | ||
| .noDefaultValue() | ||
|
|
@@ -1324,6 +1331,10 @@ public String getPayloadClass() { | |
| return getString(HoodieCompactionConfig.PAYLOAD_CLASS_NAME); | ||
| } | ||
|
|
||
| public String getMergeClass() { | ||
| return getString(HoodieCompactionConfig.MERGE_CLASS_NAME); | ||
| } | ||
|
|
||
| public int getTargetPartitionsPerDayBasedCompaction() { | ||
| return getInt(HoodieCompactionConfig.TARGET_PARTITIONS_PER_DAYBASED_COMPACTION); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,9 @@ | |
| import org.apache.hudi.common.engine.TaskContextSupplier; | ||
| import org.apache.hudi.common.fs.FSUtils; | ||
| import org.apache.hudi.common.model.HoodieRecord; | ||
| import org.apache.hudi.common.model.HoodieMerge; | ||
| import org.apache.hudi.common.model.IOType; | ||
| import org.apache.hudi.common.util.HoodieRecordUtils; | ||
| import org.apache.hudi.common.util.HoodieTimer; | ||
| import org.apache.hudi.common.util.Option; | ||
| import org.apache.hudi.common.util.ReflectionUtils; | ||
|
|
@@ -59,6 +61,7 @@ public abstract class HoodieWriteHandle<T, I, K, O> extends HoodieIOHandle<T, I, | |
| */ | ||
| protected final Schema tableSchema; | ||
| protected final Schema tableSchemaWithMetaFields; | ||
| protected final HoodieMerge merge; | ||
|
|
||
| /** | ||
| * The write schema. In most case the write schema is the same to the | ||
|
|
@@ -103,6 +106,7 @@ protected HoodieWriteHandle(HoodieWriteConfig config, String instantTime, String | |
| this.taskContextSupplier = taskContextSupplier; | ||
| this.writeToken = makeWriteToken(); | ||
| schemaOnReadEnabled = !isNullOrEmpty(hoodieTable.getConfig().getInternalSchema()); | ||
| this.merge = HoodieRecordUtils.loadMerge(config.getMergeClass()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merge is a verb, I would suggest
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @xushiyan @vinothchandar @alexeykudinkin @danny0405 I think there are differences on the name of Hoodie merge API. I created a JIRA HUDI-4380 , and we can make it clear. cc @wzx140 @minihippo |
||
| } | ||
|
|
||
| /** | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.