-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-2952] Fixing metadata table for non-partitioned dataset #4243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,6 +52,7 @@ | |
| import java.util.stream.Collectors; | ||
| import java.util.stream.Stream; | ||
|
|
||
| import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME; | ||
| import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME; | ||
|
|
||
| /** | ||
|
|
@@ -89,7 +90,7 @@ public static List<HoodieRecord> convertMetadataToRecords(HoodieCommitMetadata c | |
| List<HoodieRecord> records = new LinkedList<>(); | ||
| List<String> allPartitions = new LinkedList<>(); | ||
| commitMetadata.getPartitionToWriteStats().forEach((partitionStatName, writeStats) -> { | ||
| final String partition = partitionStatName.equals("") ? NON_PARTITIONED_NAME : partitionStatName; | ||
| final String partition = partitionStatName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionStatName; | ||
| allPartitions.add(partition); | ||
|
|
||
| Map<String, Long> newFiles = new HashMap<>(writeStats.size()); | ||
|
|
@@ -133,7 +134,8 @@ public static List<HoodieRecord> convertMetadataToRecords(HoodieCommitMetadata c | |
| public static List<HoodieRecord> convertMetadataToRecords(HoodieCleanMetadata cleanMetadata, String instantTime) { | ||
| List<HoodieRecord> records = new LinkedList<>(); | ||
| int[] fileDeleteCount = {0}; | ||
| cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> { | ||
| cleanMetadata.getPartitionMetadata().forEach((partitionName, partitionMetadata) -> { | ||
| final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName; | ||
| // Files deleted from a partition | ||
| List<String> deletedFiles = partitionMetadata.getDeletePathPatterns(); | ||
| HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, Option.empty(), | ||
|
|
@@ -282,20 +284,22 @@ private static List<HoodieRecord> convertFilesToRecords(Map<String, List<String> | |
| List<HoodieRecord> records = new LinkedList<>(); | ||
| int[] fileChangeCount = {0, 0}; // deletes, appends | ||
|
|
||
| partitionToDeletedFiles.forEach((partition, deletedFiles) -> { | ||
| partitionToDeletedFiles.forEach((partitionName, deletedFiles) -> { | ||
| fileChangeCount[0] += deletedFiles.size(); | ||
| final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName; | ||
|
|
||
| Option<Map<String, Long>> filesAdded = Option.empty(); | ||
| if (partitionToAppendedFiles.containsKey(partition)) { | ||
| filesAdded = Option.of(partitionToAppendedFiles.remove(partition)); | ||
| if (partitionToAppendedFiles.containsKey(partitionName)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is prone to error when refactoring the code. This depends on the code ordering of the partitionToDeletedFiles and partitionToAppendedFiles. Instead we can remap the keys for both at once and avoid all these back and forth lookups.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, we create HoodieRecords for metadata table within forEach, and so couldn't do it outside. |
||
| filesAdded = Option.of(partitionToAppendedFiles.remove(partitionName)); | ||
| } | ||
|
|
||
| HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, filesAdded, | ||
| Option.of(new ArrayList<>(deletedFiles))); | ||
| records.add(record); | ||
| }); | ||
|
|
||
| partitionToAppendedFiles.forEach((partition, appendedFileMap) -> { | ||
| partitionToAppendedFiles.forEach((partitionName, appendedFileMap) -> { | ||
| final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName; | ||
| fileChangeCount[1] += appendedFileMap.size(); | ||
|
|
||
| // Validate that no appended file has been deleted | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -367,7 +367,9 @@ public static List<Path> getPartitionPaths(Path basePath) throws IOException { | |
| if (Files.notExists(basePath)) { | ||
| return Collections.emptyList(); | ||
| } | ||
| return Files.list(basePath).filter(entry -> !entry.getFileName().toString().equals(HoodieTableMetaClient.METAFOLDER_NAME)).collect(Collectors.toList()); | ||
| return Files.list(basePath).filter(entry -> (!entry.getFileName().toString().equals(HoodieTableMetaClient.METAFOLDER_NAME) | ||
| && !entry.getFileName().toString().contains("parquet") && !entry.getFileName().toString().contains("log")) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no hardcoding of parquet. lets please replace with standard helpers or constants |
||
| && !entry.getFileName().toString().endsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)).collect(Collectors.toList()); | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -602,7 +602,7 @@ public Path getPartitionPath(String partition) { | |
| } | ||
|
|
||
| public List<java.nio.file.Path> getAllPartitionPaths() throws IOException { | ||
| java.nio.file.Path basePathPath = Paths.get(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME).getParent().getParent(); | ||
| java.nio.file.Path basePathPath = Paths.get(basePath); | ||
| return FileCreateUtils.getPartitionPaths(basePathPath); | ||
| } | ||
|
|
||
|
|
@@ -660,8 +660,10 @@ public FileStatus[] listAllFilesInPartition(String partitionPath) throws IOExcep | |
| return FileSystemTestUtils.listRecursive(fs, new Path(Paths.get(basePath, partitionPath).toString())).stream() | ||
| .filter(entry -> { | ||
| boolean toReturn = true; | ||
| String filePath = entry.getPath().toString(); | ||
| String fileName = entry.getPath().getName(); | ||
| if (fileName.equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) { | ||
| if (fileName.equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE) || (!fileName.contains("log") && !fileName.contains("parquet")) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
| || filePath.contains("metadata")) { | ||
| toReturn = false; | ||
| } else { | ||
| for (String inflight : inflightCommits) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pull this check into lambda
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has already been refactored into a util method.