|
29 | 29 | import org.apache.hudi.common.table.timeline.HoodieInstant;
|
30 | 30 | import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
31 | 31 | import org.apache.hudi.common.util.Option;
|
| 32 | +import org.apache.hudi.common.util.collection.ImmutablePair; |
32 | 33 | import org.apache.hudi.common.util.collection.Pair;
|
33 | 34 | import org.apache.hudi.exception.HoodieException;
|
34 | 35 | import org.apache.hudi.exception.HoodieIOException;
|
|
49 | 50 | import java.io.File;
|
50 | 51 | import java.io.FileNotFoundException;
|
51 | 52 | import java.io.IOException;
|
| 53 | +import java.io.Serializable; |
52 | 54 | import java.util.ArrayList;
|
53 | 55 | import java.util.Arrays;
|
| 56 | +import java.util.HashMap; |
54 | 57 | import java.util.HashSet;
|
55 | 58 | import java.util.List;
|
56 | 59 | import java.util.Map;
|
|
59 | 62 | import java.util.Set;
|
60 | 63 | import java.util.UUID;
|
61 | 64 | import java.util.function.Function;
|
| 65 | +import java.util.function.Predicate; |
62 | 66 | import java.util.regex.Matcher;
|
63 | 67 | import java.util.regex.Pattern;
|
64 | 68 | import java.util.stream.Collectors;
|
@@ -612,4 +616,87 @@ public static List<FileStatus> getGlobStatusExcludingMetaFolder(FileSystem fs, P
|
612 | 616 | .filter(fileStatus -> !fileStatus.getPath().toString().contains(HoodieTableMetaClient.METAFOLDER_NAME))
|
613 | 617 | .collect(Collectors.toList());
|
614 | 618 | }
|
| 619 | + |
| 620 | + /** |
| 621 | + * Deletes a directory by deleting sub-paths in parallel on the file system. |
| 622 | + * |
| 623 | + * @param hoodieEngineContext {@code HoodieEngineContext} instance |
| 624 | + * @param fs file system |
| 625 | + * @param dirPath directory path |
| 626 | + * @param parallelism parallelism to use for sub-paths |
| 627 | + * @return {@code true} if the directory is delete; {@code false} otherwise. |
| 628 | + */ |
| 629 | + public static boolean deleteDir( |
| 630 | + HoodieEngineContext hoodieEngineContext, FileSystem fs, Path dirPath, int parallelism) { |
| 631 | + try { |
| 632 | + if (fs.exists(dirPath)) { |
| 633 | + FSUtils.parallelizeSubPathProcess(hoodieEngineContext, fs, dirPath, parallelism, e -> true, |
| 634 | + pairOfSubPathAndConf -> deleteSubPath( |
| 635 | + pairOfSubPathAndConf.getKey(), pairOfSubPathAndConf.getValue(), true) |
| 636 | + ); |
| 637 | + boolean result = fs.delete(dirPath, false); |
| 638 | + LOG.info("Removed directory at " + dirPath); |
| 639 | + return result; |
| 640 | + } |
| 641 | + } catch (IOException ioe) { |
| 642 | + throw new HoodieIOException(ioe.getMessage(), ioe); |
| 643 | + } |
| 644 | + return false; |
| 645 | + } |
| 646 | + |
| 647 | + /** |
| 648 | + * Processes sub-path in parallel. |
| 649 | + * |
| 650 | + * @param hoodieEngineContext {@code HoodieEngineContext} instance |
| 651 | + * @param fs file system |
| 652 | + * @param dirPath directory path |
| 653 | + * @param parallelism parallelism to use for sub-paths |
| 654 | + * @param subPathPredicate predicate to use to filter sub-paths for processing |
| 655 | + * @param pairFunction actual processing logic for each sub-path |
| 656 | + * @param <T> type of result to return for each sub-path |
| 657 | + * @return a map of sub-path to result of the processing |
| 658 | + */ |
| 659 | + public static <T> Map<String, T> parallelizeSubPathProcess( |
| 660 | + HoodieEngineContext hoodieEngineContext, FileSystem fs, Path dirPath, int parallelism, |
| 661 | + Predicate<FileStatus> subPathPredicate, SerializableFunction<Pair<String, SerializableConfiguration>, T> pairFunction) { |
| 662 | + Map<String, T> result = new HashMap<>(); |
| 663 | + try { |
| 664 | + FileStatus[] fileStatuses = fs.listStatus(dirPath); |
| 665 | + List<String> subPaths = Arrays.stream(fileStatuses) |
| 666 | + .filter(subPathPredicate) |
| 667 | + .map(fileStatus -> fileStatus.getPath().toString()) |
| 668 | + .collect(Collectors.toList()); |
| 669 | + if (subPaths.size() > 0) { |
| 670 | + SerializableConfiguration conf = new SerializableConfiguration(fs.getConf()); |
| 671 | + int actualParallelism = Math.min(subPaths.size(), parallelism); |
| 672 | + result = hoodieEngineContext.mapToPair(subPaths, |
| 673 | + subPath -> new ImmutablePair<>(subPath, pairFunction.apply(new ImmutablePair<>(subPath, conf))), |
| 674 | + actualParallelism); |
| 675 | + } |
| 676 | + } catch (IOException ioe) { |
| 677 | + throw new HoodieIOException(ioe.getMessage(), ioe); |
| 678 | + } |
| 679 | + return result; |
| 680 | + } |
| 681 | + |
| 682 | + /** |
| 683 | + * Deletes a sub-path. |
| 684 | + * |
| 685 | + * @param subPathStr sub-path String |
| 686 | + * @param conf serializable config |
| 687 | + * @param recursive is recursive or not |
| 688 | + * @return {@code true} if the sub-path is deleted; {@code false} otherwise. |
| 689 | + */ |
| 690 | + public static boolean deleteSubPath(String subPathStr, SerializableConfiguration conf, boolean recursive) { |
| 691 | + try { |
| 692 | + Path subPath = new Path(subPathStr); |
| 693 | + FileSystem fileSystem = subPath.getFileSystem(conf.get()); |
| 694 | + return fileSystem.delete(subPath, recursive); |
| 695 | + } catch (IOException e) { |
| 696 | + throw new HoodieIOException(e.getMessage(), e); |
| 697 | + } |
| 698 | + } |
| 699 | + |
| 700 | + public interface SerializableFunction<T, R> extends Function<T, R>, Serializable { |
| 701 | + } |
615 | 702 | }
|
0 commit comments