-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-3855] Fixing FILENAME_METADATA_FIELD not being correctly updated in HoodieMergeHandle
#5296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
59c685f
f7423a3
7133eaf
e5d5668
c292f06
9458d84
98d3451
df54e1d
0028d5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
| import org.apache.hudi.common.engine.TaskContextSupplier; | ||
| import org.apache.hudi.common.fs.FSUtils; | ||
| import org.apache.hudi.common.model.HoodieBaseFile; | ||
| import org.apache.hudi.common.model.HoodieKey; | ||
| import org.apache.hudi.common.model.HoodieOperation; | ||
| import org.apache.hudi.common.model.HoodiePartitionMetadata; | ||
| import org.apache.hudi.common.model.HoodieRecord; | ||
|
|
@@ -292,13 +293,7 @@ protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord | |
| } | ||
| try { | ||
| if (indexedRecord.isPresent() && !isDelete) { | ||
| // Convert GenericRecord to GenericRecord with hoodie commit metadata in schema | ||
| if (preserveMetadata && useWriterSchemaForCompaction) { // useWriteSchema will be true only in case of compaction. | ||
| fileWriter.writeAvro(hoodieRecord.getRecordKey(), | ||
| rewriteRecordWithMetadata((GenericRecord) indexedRecord.get(), newFilePath.getName())); | ||
| } else { | ||
| fileWriter.writeAvroWithMetadata(rewriteRecord((GenericRecord) indexedRecord.get()), hoodieRecord); | ||
| } | ||
| writeToFile(hoodieRecord.getKey(), (GenericRecord) indexedRecord.get(), preserveMetadata && useWriterSchemaForCompaction); | ||
| recordsWritten++; | ||
| } else { | ||
| recordsDeleted++; | ||
|
|
@@ -352,14 +347,9 @@ public void write(GenericRecord oldRecord) { | |
| } | ||
|
|
||
| if (copyOldRecord) { | ||
| // this should work as it is, since this is an existing record | ||
| try { | ||
| // rewrite file names | ||
| // do not preserve FILENAME_METADATA_FIELD | ||
| if (preserveMetadata && useWriterSchemaForCompaction) { | ||
| oldRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, newFilePath.getName()); | ||
| } | ||
| fileWriter.writeAvro(key, oldRecord); | ||
| // NOTE: We're enforcing preservation of the record metadata to keep existing semantic | ||
| writeToFile(new HoodieKey(key, partitionPath), oldRecord, true); | ||
| } catch (IOException | RuntimeException e) { | ||
| String errMsg = String.format("Failed to merge old record into new file for key %s from old file %s to new file %s with writerSchema %s", | ||
| key, getOldFilePath(), newFilePath, writeSchemaWithMetaFields.toString(true)); | ||
|
|
@@ -370,6 +360,16 @@ public void write(GenericRecord oldRecord) { | |
| } | ||
| } | ||
|
|
||
| protected void writeToFile(HoodieKey key, GenericRecord avroRecord, boolean shouldPreserveRecordMetadata) throws IOException { | ||
| if (shouldPreserveRecordMetadata) { | ||
| // NOTE: `FILENAME_METADATA_FIELD` has to be rewritten to correctly point to the | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can create handle reuse this method ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Totally. Previously it relied on
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do want to review all handles more holistically and cleanup quite a bit of duplication and unnecessary complication that we've currently amassed.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is great if you can do that. |
||
| // file holding this record even in cases when overall metadata is preserved | ||
| fileWriter.writeAvro(key.getRecordKey(), rewriteRecordWithMetadata(avroRecord, newFilePath.getName())); | ||
| } else { | ||
| fileWriter.writeAvroWithMetadata(key, rewriteRecord(avroRecord)); | ||
| } | ||
| } | ||
|
|
||
| protected void writeIncomingRecords() throws IOException { | ||
| // write out any pending records (this can happen when inserts are turned into updates) | ||
| Iterator<HoodieRecord<T>> newRecordsItr = (keyToNewRecords instanceof ExternalSpillableMap) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.