diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java index ffcc3b3527e..1363be2e02a 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java @@ -88,20 +88,31 @@ public void clean() throws IOException { Partition partition = hiveDatasetVersion.getPartition(); try { if (!cleanableHiveDataset.isSimulate()) { + // As part of the cleanup process, we want to delete both: hive partition and underlying hdfs files + // However, scenarios arise where hive partition is dropped, but hdfs files aren't, leading to dangling files + // Thus, we reverse the order of cleaning up hdfs files first and then drop hive partition + // In cases where HMS was unresponsive and hive partition couldn't be dropped + // re-running hive retention would drop the partition with no hdfs files found to be deleted + // or set the flag `isShouldDeleteData` to false + if (cleanableHiveDataset.isShouldDeleteData()) { + cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories); + } client.get().dropPartition(partition.getTable().getDbName(), partition.getTable().getTableName(), partition.getValues(), false); log.info("Successfully dropped partition " + partition.getCompleteName()); } else { log.info("Simulating drop partition " + partition.getCompleteName()); } - if (cleanableHiveDataset.isShouldDeleteData()) { - cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories); - } } catch (TException | IOException e) { log.warn(String.format("Failed to completely delete partition %s.", partition.getCompleteName()), e); throw new IOException(e); } } - cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset); + try { + cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset); + } catch (IOException ex) { + log.warn(String.format("Failed to delete at least one or more empty directories from total:{%s} with root path %s", possiblyEmptyDirectories.size(), cleanableHiveDataset.datasetRoot()), ex); + throw new IOException(ex); + } } @Override