From e52b94809103b6fc2bb39dedaf9f88dff133bf6f Mon Sep 17 00:00:00 2001 From: zhuanshenbsj1 Date: Wed, 7 Dec 2022 13:37:58 +0800 Subject: [PATCH 1/2] IncrementalCleaning consider later clustering --- .../org/apache/hudi/table/action/clean/CleanPlanner.java | 8 ++++++-- .../apache/hudi/common/table/timeline/HoodieInstant.java | 9 +++++++++ .../table/timeline/HoodieInstantTimeGenerator.java | 9 +++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java index 396b47ae0a30c..4aa8df4ab2eb0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java @@ -188,8 +188,12 @@ private List getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata + "since last cleaned at " + cleanMetadata.getEarliestCommitToRetain() + ". New Instant to retain : " + newInstantToRetain); return hoodieTable.getCompletedCommitsTimeline().getInstantsAsStream().filter( - instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, - cleanMetadata.getEarliestCommitToRetain()) && HoodieTimeline.compareTimestamps(instant.getTimestamp(), + instant -> (HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS, + cleanMetadata.getEarliestCommitToRetain()) + || (instant.getMarkerFileAccessTimestamp().isPresent() + && (HoodieTimeline.compareTimestamps(instant.getMarkerFileAccessTimestamp().get(), HoodieTimeline.GREATER_THAN_OR_EQUALS, + cleanMetadata.getStartCleanTime())))) + && HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, newInstantToRetain.get().getTimestamp())).flatMap(instant -> { try { if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(instant.getAction())) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java index 0115742e07a08..fd7e704c09d7b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java @@ -19,6 +19,7 @@ package org.apache.hudi.common.table.timeline; import org.apache.hadoop.fs.FileStatus; +import org.apache.hudi.common.util.Option; import java.io.Serializable; import java.util.Comparator; @@ -73,6 +74,7 @@ public enum State { private State state = State.COMPLETED; private String action; private String timestamp; + private Option markerFileAccessTimestamp ; /** * Load the instant from the meta FileStatus. @@ -82,6 +84,7 @@ public HoodieInstant(FileStatus fileStatus) { String fileName = fileStatus.getPath().getName(); String fileExtension = getTimelineFileExtension(fileName); timestamp = fileName.replace(fileExtension, ""); + markerFileAccessTimestamp = Option.ofNullable(HoodieInstantTimeGenerator.parseTimeMillisToInstantTime(fileStatus.getAccessTime())); // Next read the action for this marker action = fileExtension.replaceFirst(".", ""); @@ -104,12 +107,14 @@ public HoodieInstant(boolean isInflight, String action, String timestamp) { this.state = isInflight ? State.INFLIGHT : State.COMPLETED; this.action = action; this.timestamp = timestamp; + this.markerFileAccessTimestamp = Option.ofNullable(null); } public HoodieInstant(State state, String action, String timestamp) { this.state = state; this.action = action; this.timestamp = timestamp; + this.markerFileAccessTimestamp = Option.ofNullable(null); } public boolean isCompleted() { @@ -132,6 +137,10 @@ public String getTimestamp() { return timestamp; } + public Option getMarkerFileAccessTimestamp(){ + return markerFileAccessTimestamp; + } + /** * Get the filename for this instant. */ diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java index e839e73669e90..abc2992ec3e1a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java @@ -81,6 +81,15 @@ public static String createNewInstantTime(long milliseconds) { }); } + public static String parseTimeMillisToInstantTime(long milliseconds){ + try { + Date d = new Date(milliseconds); + return MILLIS_INSTANT_TIME_FORMATTER.format(convertDateToTemporalAccessor(d)); + } catch (DateTimeParseException e) { + throw e; + } + } + public static Date parseDateFromInstantTime(String timestamp) throws ParseException { try { // Enables backwards compatibility with non-millisecond granularity instants From ff0887aff8930a1813f4863563e12d8b3db1b20e Mon Sep 17 00:00:00 2001 From: zhuanshenbsj1 Date: Wed, 7 Dec 2022 23:57:12 +0800 Subject: [PATCH 2/2] fix checkstyle error --- .../apache/hudi/common/table/timeline/HoodieInstant.java | 7 ++++--- .../common/table/timeline/HoodieInstantTimeGenerator.java | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java index fd7e704c09d7b..5a2081c1a7f85 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java @@ -18,9 +18,10 @@ package org.apache.hudi.common.table.timeline; -import org.apache.hadoop.fs.FileStatus; import org.apache.hudi.common.util.Option; +import org.apache.hadoop.fs.FileStatus; + import java.io.Serializable; import java.util.Comparator; import java.util.HashMap; @@ -74,7 +75,7 @@ public enum State { private State state = State.COMPLETED; private String action; private String timestamp; - private Option markerFileAccessTimestamp ; + private Option markerFileAccessTimestamp; /** * Load the instant from the meta FileStatus. @@ -137,7 +138,7 @@ public String getTimestamp() { return timestamp; } - public Option getMarkerFileAccessTimestamp(){ + public Option getMarkerFileAccessTimestamp() { return markerFileAccessTimestamp; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java index abc2992ec3e1a..03d068b190958 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java @@ -81,7 +81,7 @@ public static String createNewInstantTime(long milliseconds) { }); } - public static String parseTimeMillisToInstantTime(long milliseconds){ + public static String parseTimeMillisToInstantTime(long milliseconds) { try { Date d = new Date(milliseconds); return MILLIS_INSTANT_TIME_FORMATTER.format(convertDateToTemporalAccessor(d));