diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java index cfc143e3d0caa..d940f3bb4577a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java @@ -615,6 +615,24 @@ public static Path getPartitionPath(Path basePath, String partitionPath) { return StringUtils.isNullOrEmpty(partitionPath) ? basePath : new Path(basePath, partitionPath); } + /** + * Extracts the file name from the relative path based on the table base path. For example: + * "/2022/07/29/file1.parquet", "/2022/07/29" -> "file1.parquet" + * "2022/07/29/file2.parquet", "2022/07/29" -> "file2.parquet" + * "/file3.parquet", "" -> "file3.parquet" + * "file4.parquet", "" -> "file4.parquet" + * + * @param filePathWithPartition the relative file path based on the table base path. + * @param partition the relative partition path. For partitioned table, `partition` contains the relative partition path; + * for non-partitioned table, `partition` is empty + * @return Extracted file name in String. + */ + public static String getFileName(String filePathWithPartition, String partition) { + int offset = StringUtils.isNullOrEmpty(partition) + ? (filePathWithPartition.startsWith("/") ? 1 : 0) : partition.length() + 1; + return filePathWithPartition.substring(offset); + } + /** * Get DFS full partition path (e.g. hdfs://ip-address:8020:/) */ diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index d41f09990e6d5..2c5b8db0edc37 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -325,16 +325,13 @@ public static List convertMetadataToFilesPartitionRecords(HoodieCo return map; } - int offset = partition.equals(NON_PARTITIONED_NAME) - ? (pathWithPartition.startsWith("/") ? 1 : 0) - : partition.length() + 1; - String filename = pathWithPartition.substring(offset); + String fileName = FSUtils.getFileName(pathWithPartition, partitionStatName); // Since write-stats are coming in no particular order, if the same // file have previously been appended to w/in the txn, we simply pick max // of the sizes as reported after every write, since file-sizes are // monotonically increasing (ie file-size never goes down, unless deleted) - map.merge(filename, stat.getFileSizeInBytes(), Math::max); + map.merge(fileName, stat.getFileSizeInBytes(), Math::max); return map; }, @@ -410,12 +407,7 @@ public static HoodieData convertMetadataToBloomFilterRecords( return Collections.emptyListIterator(); } - // For partitioned table, "partition" contains the relative partition path; - // for non-partitioned table, "partition" is empty - int offset = StringUtils.isNullOrEmpty(partition) - ? (pathWithPartition.startsWith("/") ? 1 : 0) : partition.length() + 1; - - final String fileName = pathWithPartition.substring(offset); + String fileName = FSUtils.getFileName(pathWithPartition, partition); if (!FSUtils.isBaseFile(new Path(fileName))) { return Collections.emptyListIterator(); } @@ -1162,13 +1154,8 @@ private static Stream getColumnStatsRecords(String partitionPath, HoodieTableMetaClient datasetMetaClient, List columnsToIndex, boolean isDeleted) { - String partitionName = getPartitionIdentifier(partitionPath); - // NOTE: We have to chop leading "/" to make sure Hadoop does not treat it like - // absolute path String filePartitionPath = filePath.startsWith("/") ? filePath.substring(1) : filePath; - String fileName = partitionName.equals(NON_PARTITIONED_NAME) - ? filePartitionPath - : filePartitionPath.substring(partitionName.length() + 1); + String fileName = FSUtils.getFileName(filePath, partitionPath); if (isDeleted) { // TODO we should delete records instead of stubbing them diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java index 7506e659c9254..481bb1dd452da 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java @@ -326,9 +326,17 @@ public void testFileNameRelatedFunctions() throws Exception { Files.createFile(partitionPath.resolve(log3)); assertEquals(3, (int) FSUtils.getLatestLogVersion(FSUtils.getFs(basePath, new Configuration()), - new Path(partitionPath.toString()), fileId, LOG_EXTENTION, instantTime).get().getLeft()); + new Path(partitionPath.toString()), fileId, LOG_EXTENTION, instantTime).get().getLeft()); assertEquals(4, FSUtils.computeNextLogVersion(FSUtils.getFs(basePath, new Configuration()), - new Path(partitionPath.toString()), fileId, LOG_EXTENTION, instantTime)); + new Path(partitionPath.toString()), fileId, LOG_EXTENTION, instantTime)); + } + + @Test + public void testGetFilename() { + assertEquals("file1.parquet", FSUtils.getFileName("/2022/07/29/file1.parquet", "/2022/07/29")); + assertEquals("file2.parquet", FSUtils.getFileName("2022/07/29/file2.parquet", "2022/07/29")); + assertEquals("file3.parquet", FSUtils.getFileName("/file3.parquet", "")); + assertEquals("file4.parquet", FSUtils.getFileName("file4.parquet", "")); } private void prepareTestDirectory(FileSystem fileSystem, String rootDir) throws IOException {