From ed0111978869c8646f8801024e3a1be89a75d23e Mon Sep 17 00:00:00 2001 From: welin Date: Tue, 29 Oct 2024 21:07:55 -0700 Subject: [PATCH 1/9] Compare with the current time in UTC --- .../trash/TimeBasedSnapshotCleanupPolicy.java | 16 ++++ .../TimeBasedSnapshotCleanupPolicyTest.java | 94 +++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java index 54c084b899e..a0c38494eac 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java @@ -21,6 +21,8 @@ import org.apache.hadoop.fs.FileStatus; import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.Duration; /** @@ -41,6 +43,20 @@ public TimeBasedSnapshotCleanupPolicy(Properties props) { @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); + System.out.println("Parsed time is " + snapshotTime + " and the timezone is " + snapshotTime.getZone()); + System.out.println("Target clean up time is " + snapshotTime.plusMinutes(this.retentionMinutes)); + System.out.println("Current time is " + new DateTime() + " and the timezone is " + new DateTime().getZone()); + + DateTime now = new DateTime().withZone(DateTimeZone.UTC).minusHours(7); // mimic the time in azkaban + DateTime targetCleanupTime = snapshotTime.plusMinutes(this.retentionMinutes); + DateTime delta = targetCleanupTime.minus(now.getMillis()); + + Duration duration = new Duration(now, targetCleanupTime); + duration.toStandardHours(); + duration.toStandardMinutes(); + System.out.println("Time delta is " + duration.toStandardHours() + " hours and " + duration.toStandardMinutes() + " minutes"); + + return snapshotTime.plusMinutes(this.retentionMinutes).isBeforeNow(); } } diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java new file mode 100644 index 00000000000..3e8236ba7ae --- /dev/null +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.data.management.trash; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.testng.Assert; +import org.testng.annotations.Test; +import org.testng.annotations.BeforeMethod; + +import java.io.IOException; +import java.util.Properties; + +public class TimeBasedSnapshotCleanupPolicyTest { + + private MockTimeBasedSnapshotCleanupPolicy cleanupPolicy; + + @BeforeMethod + public void setUp() { + // Initialize the cleanup policy with a retention period (e.g., 1 day) + Properties properties = new Properties(); + properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, "1440"); // 1440 minutes = 1 day + // Mock the cutoff time to be 2024-10-30 01:01:00 UTC + cleanupPolicy = new MockTimeBasedSnapshotCleanupPolicy(properties, new DateTime(2024, 10, 30, 1, 1, DateTimeZone.UTC)); + } + + @Test + public void testShouldDeleteSnapshot() throws IOException { + + // Create a Trash + TrashTestBase trashTestBase = new TrashTestBase(new Properties()); + Trash trash = trashTestBase.trash; + + // Create dummy paths + FileStatus fs1 = new FileStatus(0, true, 0, 0, 0, + new Path(trash.getTrashLocation(), new DateTime(2024, 10, 29, 1, 0, DateTimeZone.UTC).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); + FileStatus fs2 = new FileStatus(0, true, 0, 0, 0, + new Path(trash.getTrashLocation(), new DateTime(2024, 10, 29, 2, 0, DateTimeZone.UTC).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); + + // Test old snapshot (should be deleted) + // 2024-10-29 01:00:00 UTC + 1440 minutes = 2024-10-30 01:00:00 UTC < Cutoff time a.k.a system current_time (2024-10-30 01:01:00 UTC) + Assert.assertTrue(cleanupPolicy.shouldDeleteSnapshot(fs1, trash), "Old snapshot should be deleted"); + + // Test snapshot (should not be deleted) + // 2024-10-29 02:00:00 UTC + 1440 minutes = 2024-10-30 02:00:00 UTC > Cutoff time a.k.a system current_time (2024-10-30 01:01:00 UTC) + Assert.assertFalse(cleanupPolicy.shouldDeleteSnapshot(fs2, trash), "snapshot should not be deleted"); + } + + /** + * Mock the TimeBasedSnapshotCleanupPolicy for testing purposes + * + * In this class, the current time used in the comparison method isBefore() can be mocked + * Why? The current time is used to determine if a snapshot is older than the retention period, + * and given that the current time is always changing, it is difficult to test the method shouldDeleteSnapshot() + */ + public class MockTimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy { + + public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; + public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day + + private final int retentionMinutes; + private final DateTime MOCK_CURRENT_TIME; + + public MockTimeBasedSnapshotCleanupPolicy(Properties props, DateTime mockCurrentTime) { + this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, + Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); + this.MOCK_CURRENT_TIME = mockCurrentTime; + } + + @Override + public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { + DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); + return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME); + } + } + +} \ No newline at end of file From 6bfb281281ac382c1abbbaeaac04d687b1c5c512 Mon Sep 17 00:00:00 2001 From: welin Date: Tue, 29 Oct 2024 21:25:07 -0700 Subject: [PATCH 2/9] clean up code --- .../trash/TimeBasedSnapshotCleanupPolicy.java | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java index a0c38494eac..e3a999beed6 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java @@ -22,7 +22,6 @@ import org.apache.hadoop.fs.FileStatus; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; -import org.joda.time.Duration; /** @@ -43,20 +42,7 @@ public TimeBasedSnapshotCleanupPolicy(Properties props) { @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - System.out.println("Parsed time is " + snapshotTime + " and the timezone is " + snapshotTime.getZone()); - System.out.println("Target clean up time is " + snapshotTime.plusMinutes(this.retentionMinutes)); - System.out.println("Current time is " + new DateTime() + " and the timezone is " + new DateTime().getZone()); - - DateTime now = new DateTime().withZone(DateTimeZone.UTC).minusHours(7); // mimic the time in azkaban - DateTime targetCleanupTime = snapshotTime.plusMinutes(this.retentionMinutes); - DateTime delta = targetCleanupTime.minus(now.getMillis()); - - Duration duration = new Duration(now, targetCleanupTime); - duration.toStandardHours(); - duration.toStandardMinutes(); - System.out.println("Time delta is " + duration.toStandardHours() + " hours and " + duration.toStandardMinutes() + " minutes"); - - - return snapshotTime.plusMinutes(this.retentionMinutes).isBeforeNow(); + // To ensure that the comparison between snapshotTime and the current time is done in the same time zone + return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(DateTime.now(DateTimeZone.UTC)); } } From e864d80876849849196d9f07cf3f7514de01b0d1 Mon Sep 17 00:00:00 2001 From: welin Date: Wed, 30 Oct 2024 11:17:22 -0700 Subject: [PATCH 3/9] address comments --- .../trash/TimeBasedSnapshotCleanupPolicy.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java index e3a999beed6..06dcdbd7934 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java @@ -31,18 +31,30 @@ public class TimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy { public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day + public static final String USE_UTC_COMPARISON_KEY = "gobblin.trash.snapshot.retention.comparison.useUTC"; + public static final boolean USE_UTC_COMPARISON_DEFAULT = false; private final int retentionMinutes; + private final boolean useUTCComparison; public TimeBasedSnapshotCleanupPolicy(Properties props) { this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); + + this.useUTCComparison = Boolean.parseBoolean(props.getProperty(USE_UTC_COMPARISON_KEY, + Boolean.toString(USE_UTC_COMPARISON_DEFAULT))); } @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - // To ensure that the comparison between snapshotTime and the current time is done in the same time zone + + if (this.useUTCComparison) { + // Ensure that the comparison between snapshotTime and the current time is done in the same time zone (UTC) return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(DateTime.now(DateTimeZone.UTC)); + } else { + // Default to use system time zone + return snapshotTime.plusMinutes(this.retentionMinutes).isBeforeNow(); + } } } From 0ab6daa778eae17e9ad9e77c9dcb543f8e578c4b Mon Sep 17 00:00:00 2001 From: welin Date: Wed, 30 Oct 2024 12:47:49 -0700 Subject: [PATCH 4/9] update unit tests --- .../TimeBasedSnapshotCleanupPolicyTest.java | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java index 3e8236ba7ae..e7acd717421 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -26,6 +26,7 @@ import org.testng.annotations.BeforeMethod; import java.io.IOException; +import java.sql.Date; import java.util.Properties; public class TimeBasedSnapshotCleanupPolicyTest { @@ -37,8 +38,9 @@ public void setUp() { // Initialize the cleanup policy with a retention period (e.g., 1 day) Properties properties = new Properties(); properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, "1440"); // 1440 minutes = 1 day + properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.USE_UTC_COMPARISON_KEY, "true"); // Mock the cutoff time to be 2024-10-30 01:01:00 UTC - cleanupPolicy = new MockTimeBasedSnapshotCleanupPolicy(properties, new DateTime(2024, 10, 30, 1, 1, DateTimeZone.UTC)); + cleanupPolicy = new MockTimeBasedSnapshotCleanupPolicy(properties, new DateTime(2024, 10, 30, 1, 1)); } @Test @@ -74,20 +76,32 @@ public class MockTimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day + public static final String USE_UTC_COMPARISON_KEY = "gobblin.trash.snapshot.retention.comparison.useUTC"; + public static final boolean USE_UTC_COMPARISON_DEFAULT = false; private final int retentionMinutes; - private final DateTime MOCK_CURRENT_TIME; + private final boolean useUTCComparison; + private final DateTime MOCK_CURRENT_TIME; public MockTimeBasedSnapshotCleanupPolicy(Properties props, DateTime mockCurrentTime) { this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); + this.useUTCComparison = Boolean.parseBoolean(props.getProperty(USE_UTC_COMPARISON_KEY, + Boolean.toString(USE_UTC_COMPARISON_DEFAULT))); this.MOCK_CURRENT_TIME = mockCurrentTime; } @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME); + + if (this.useUTCComparison) { + // Ensure that the comparison between snapshotTime and the current time is done in the same time zone (UTC) + return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME.withZoneRetainFields(DateTimeZone.UTC)); + } else { + // Default to use system time zone + return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME); + } } } From 3ead802b272f41ebf43964117155881b8a42cc3b Mon Sep 17 00:00:00 2001 From: welin Date: Fri, 8 Nov 2024 23:58:46 -0800 Subject: [PATCH 5/9] address comments --- .../trash/TimeBasedSnapshotCleanupPolicy.java | 19 +++++------------- .../TimeBasedSnapshotCleanupPolicyTest.java | 20 +++++-------------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java index 06dcdbd7934..f9a90f377f3 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java @@ -31,30 +31,21 @@ public class TimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy { public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day - public static final String USE_UTC_COMPARISON_KEY = "gobblin.trash.snapshot.retention.comparison.useUTC"; - public static final boolean USE_UTC_COMPARISON_DEFAULT = false; + public static final String RETENTION_SNAPSHOT_TIMEZONE = "gobblin.trash.snapshot.retention.timezone"; private final int retentionMinutes; - private final boolean useUTCComparison; + private final DateTimeZone retentionSnapshotTimezone; public TimeBasedSnapshotCleanupPolicy(Properties props) { this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); - - this.useUTCComparison = Boolean.parseBoolean(props.getProperty(USE_UTC_COMPARISON_KEY, - Boolean.toString(USE_UTC_COMPARISON_DEFAULT))); + this.retentionSnapshotTimezone = props.containsKey(RETENTION_SNAPSHOT_TIMEZONE) ? DateTimeZone.forID(props.getProperty(RETENTION_SNAPSHOT_TIMEZONE)) : DateTimeZone.getDefault(); } @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - - if (this.useUTCComparison) { - // Ensure that the comparison between snapshotTime and the current time is done in the same time zone (UTC) - return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(DateTime.now(DateTimeZone.UTC)); - } else { - // Default to use system time zone - return snapshotTime.plusMinutes(this.retentionMinutes).isBeforeNow(); - } + // Ensure that the comparison between snapshotTime and the current time can be done in the same time zone (UTC) with timezone specified in the propertie (Default to system time zone) + return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(DateTime.now(this.retentionSnapshotTimezone)); } } diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java index e7acd717421..0621cdc2382 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -26,7 +26,6 @@ import org.testng.annotations.BeforeMethod; import java.io.IOException; -import java.sql.Date; import java.util.Properties; public class TimeBasedSnapshotCleanupPolicyTest { @@ -38,7 +37,7 @@ public void setUp() { // Initialize the cleanup policy with a retention period (e.g., 1 day) Properties properties = new Properties(); properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, "1440"); // 1440 minutes = 1 day - properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.USE_UTC_COMPARISON_KEY, "true"); + properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.RETENTION_SNAPSHOT_TIMEZONE, "UTC"); // Mock the cutoff time to be 2024-10-30 01:01:00 UTC cleanupPolicy = new MockTimeBasedSnapshotCleanupPolicy(properties, new DateTime(2024, 10, 30, 1, 1)); } @@ -76,32 +75,23 @@ public class MockTimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day - public static final String USE_UTC_COMPARISON_KEY = "gobblin.trash.snapshot.retention.comparison.useUTC"; - public static final boolean USE_UTC_COMPARISON_DEFAULT = false; + public static final String RETENTION_SNAPSHOT_TIMEZONE = "gobblin.trash.snapshot.retention.timezone"; private final int retentionMinutes; - private final boolean useUTCComparison; private final DateTime MOCK_CURRENT_TIME; + private final DateTimeZone retentionSnapshotTimezone; public MockTimeBasedSnapshotCleanupPolicy(Properties props, DateTime mockCurrentTime) { this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); - this.useUTCComparison = Boolean.parseBoolean(props.getProperty(USE_UTC_COMPARISON_KEY, - Boolean.toString(USE_UTC_COMPARISON_DEFAULT))); this.MOCK_CURRENT_TIME = mockCurrentTime; + this.retentionSnapshotTimezone = props.containsKey(RETENTION_SNAPSHOT_TIMEZONE) ? DateTimeZone.forID(props.getProperty(RETENTION_SNAPSHOT_TIMEZONE)) : DateTimeZone.getDefault(); } @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - - if (this.useUTCComparison) { - // Ensure that the comparison between snapshotTime and the current time is done in the same time zone (UTC) - return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME.withZoneRetainFields(DateTimeZone.UTC)); - } else { - // Default to use system time zone - return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME); - } + return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME.withZoneRetainFields(this.retentionSnapshotTimezone)); } } From c5c4892475da2107fe5096db1331f033def2721c Mon Sep 17 00:00:00 2001 From: welin Date: Wed, 13 Nov 2024 13:24:19 -0800 Subject: [PATCH 6/9] address comment --- .../management/trash/TimeBasedSnapshotCleanupPolicyTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java index 0621cdc2382..7d052fe2828 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -71,7 +71,7 @@ public void testShouldDeleteSnapshot() throws IOException { * Why? The current time is used to determine if a snapshot is older than the retention period, * and given that the current time is always changing, it is difficult to test the method shouldDeleteSnapshot() */ - public class MockTimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy { + public class MockTimeBasedSnapshotCleanupPolicy extends TimeBasedSnapshotCleanupPolicy { public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day @@ -82,6 +82,7 @@ public class MockTimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy private final DateTimeZone retentionSnapshotTimezone; public MockTimeBasedSnapshotCleanupPolicy(Properties props, DateTime mockCurrentTime) { + super(props); this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); this.MOCK_CURRENT_TIME = mockCurrentTime; From 401fb30e1be7f53756a2d2640686c78068aaf269 Mon Sep 17 00:00:00 2001 From: welin Date: Wed, 13 Nov 2024 14:18:48 -0800 Subject: [PATCH 7/9] address comments --- .../management/trash/TimeBasedSnapshotCleanupPolicy.java | 4 ++-- .../trash/TimeBasedSnapshotCleanupPolicyTest.java | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java index f9a90f377f3..3d2da00f30a 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java @@ -33,8 +33,8 @@ public class TimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy { public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day public static final String RETENTION_SNAPSHOT_TIMEZONE = "gobblin.trash.snapshot.retention.timezone"; - private final int retentionMinutes; - private final DateTimeZone retentionSnapshotTimezone; + protected final int retentionMinutes; + protected final DateTimeZone retentionSnapshotTimezone; public TimeBasedSnapshotCleanupPolicy(Properties props) { this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java index 7d052fe2828..b0ce780f64e 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -73,20 +73,11 @@ public void testShouldDeleteSnapshot() throws IOException { */ public class MockTimeBasedSnapshotCleanupPolicy extends TimeBasedSnapshotCleanupPolicy { - public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; - public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day - public static final String RETENTION_SNAPSHOT_TIMEZONE = "gobblin.trash.snapshot.retention.timezone"; - - private final int retentionMinutes; private final DateTime MOCK_CURRENT_TIME; - private final DateTimeZone retentionSnapshotTimezone; public MockTimeBasedSnapshotCleanupPolicy(Properties props, DateTime mockCurrentTime) { super(props); - this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, - Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); this.MOCK_CURRENT_TIME = mockCurrentTime; - this.retentionSnapshotTimezone = props.containsKey(RETENTION_SNAPSHOT_TIMEZONE) ? DateTimeZone.forID(props.getProperty(RETENTION_SNAPSHOT_TIMEZONE)) : DateTimeZone.getDefault(); } @Override From f749ab97f6a4f16abfca58f39506a9c28f60867c Mon Sep 17 00:00:00 2001 From: welin Date: Wed, 20 Nov 2024 14:37:34 -0800 Subject: [PATCH 8/9] Fix bug where azkaban prop.contains(object) is checking value not key as intended --- .../trash/TimeBasedSnapshotCleanupPolicy.java | 11 +--- .../gobblin/data/management/trash/Trash.java | 2 +- .../TimeBasedSnapshotCleanupPolicyTest.java | 57 ++++++------------- .../data/management/trash/TrashTest.java | 23 ++++++++ 4 files changed, 43 insertions(+), 50 deletions(-) diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java index 3d2da00f30a..8e343ea1432 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicy.java @@ -21,7 +21,6 @@ import org.apache.hadoop.fs.FileStatus; import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; /** @@ -31,21 +30,17 @@ public class TimeBasedSnapshotCleanupPolicy implements SnapshotCleanupPolicy { public static final String SNAPSHOT_RETENTION_POLICY_MINUTES_KEY = "gobblin.trash.snapshot.retention.minutes"; public static final int SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT = 1440; // one day - public static final String RETENTION_SNAPSHOT_TIMEZONE = "gobblin.trash.snapshot.retention.timezone"; - protected final int retentionMinutes; - protected final DateTimeZone retentionSnapshotTimezone; + private final int retentionMinutes; public TimeBasedSnapshotCleanupPolicy(Properties props) { this.retentionMinutes = Integer.parseInt(props.getProperty(SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, Integer.toString(SNAPSHOT_RETENTION_POLICY_MINUTES_DEFAULT))); - this.retentionSnapshotTimezone = props.containsKey(RETENTION_SNAPSHOT_TIMEZONE) ? DateTimeZone.forID(props.getProperty(RETENTION_SNAPSHOT_TIMEZONE)) : DateTimeZone.getDefault(); } @Override public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - // Ensure that the comparison between snapshotTime and the current time can be done in the same time zone (UTC) with timezone specified in the propertie (Default to system time zone) - return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(DateTime.now(this.retentionSnapshotTimezone)); + return snapshotTime.plusMinutes(this.retentionMinutes).isBeforeNow(); } -} +} \ No newline at end of file diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/Trash.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/Trash.java index 48c85b6393a..1ec4863c18c 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/Trash.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/trash/Trash.java @@ -178,7 +178,7 @@ protected Trash(FileSystem fs, Properties props, String user) throws IOException } public static Trash getTrash(FileSystem fs, Properties props, String user) throws IOException { - if (props.contains(TRASH_CLASS_KEY)) { + if (props.containsKey(TRASH_CLASS_KEY)) { return GobblinConstructorUtils.invokeConstructor(Trash.class, props.getProperty(TRASH_CLASS_KEY), fs, props, user); } else { return new Trash(fs, props, user); diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java index b0ce780f64e..7c6dfd0a387 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -30,16 +30,15 @@ public class TimeBasedSnapshotCleanupPolicyTest { - private MockTimeBasedSnapshotCleanupPolicy cleanupPolicy; + private TimeBasedSnapshotCleanupPolicy cleanupPolicy; @BeforeMethod public void setUp() { - // Initialize the cleanup policy with a retention period (e.g., 1 day) + // Initialize the cleanup policy with a retention period of 600 minutes (10 hours) Properties properties = new Properties(); - properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, "1440"); // 1440 minutes = 1 day - properties.setProperty(MockTimeBasedSnapshotCleanupPolicy.RETENTION_SNAPSHOT_TIMEZONE, "UTC"); - // Mock the cutoff time to be 2024-10-30 01:01:00 UTC - cleanupPolicy = new MockTimeBasedSnapshotCleanupPolicy(properties, new DateTime(2024, 10, 30, 1, 1)); + properties.setProperty(TimeBasedSnapshotCleanupPolicy.SNAPSHOT_RETENTION_POLICY_MINUTES_KEY, "600"); // 600 minutes = 10 hours + cleanupPolicy = new TimeBasedSnapshotCleanupPolicy(properties); + } @Test @@ -49,42 +48,18 @@ public void testShouldDeleteSnapshot() throws IOException { TrashTestBase trashTestBase = new TrashTestBase(new Properties()); Trash trash = trashTestBase.trash; - // Create dummy paths - FileStatus fs1 = new FileStatus(0, true, 0, 0, 0, - new Path(trash.getTrashLocation(), new DateTime(2024, 10, 29, 1, 0, DateTimeZone.UTC).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); - FileStatus fs2 = new FileStatus(0, true, 0, 0, 0, - new Path(trash.getTrashLocation(), new DateTime(2024, 10, 29, 2, 0, DateTimeZone.UTC).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); + // Get the current time + DateTime now = DateTime.now(DateTimeZone.UTC); - // Test old snapshot (should be deleted) - // 2024-10-29 01:00:00 UTC + 1440 minutes = 2024-10-30 01:00:00 UTC < Cutoff time a.k.a system current_time (2024-10-30 01:01:00 UTC) - Assert.assertTrue(cleanupPolicy.shouldDeleteSnapshot(fs1, trash), "Old snapshot should be deleted"); + // Create dummy paths with timestamps between 11 and 9 hours ago in UTC + FileStatus fs1 = new FileStatus(0, true, 0, 0, 0, + new Path(trash.getTrashLocation(), now.minusHours(11).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); + FileStatus fs2 = new FileStatus(0, true, 0, 0, 0, + new Path(trash.getTrashLocation(), now.minusHours(9).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); - // Test snapshot (should not be deleted) - // 2024-10-29 02:00:00 UTC + 1440 minutes = 2024-10-30 02:00:00 UTC > Cutoff time a.k.a system current_time (2024-10-30 01:01:00 UTC) - Assert.assertFalse(cleanupPolicy.shouldDeleteSnapshot(fs2, trash), "snapshot should not be deleted"); + // Test snapshot (should be deleted) + Assert.assertTrue(cleanupPolicy.shouldDeleteSnapshot(fs1, trash),"Snapshot should be deleted"); + // Test snapshot (should NOT be deleted) + Assert.assertFalse(cleanupPolicy.shouldDeleteSnapshot(fs2, trash),"Snapshot should NOT be deleted"); } - - /** - * Mock the TimeBasedSnapshotCleanupPolicy for testing purposes - * - * In this class, the current time used in the comparison method isBefore() can be mocked - * Why? The current time is used to determine if a snapshot is older than the retention period, - * and given that the current time is always changing, it is difficult to test the method shouldDeleteSnapshot() - */ - public class MockTimeBasedSnapshotCleanupPolicy extends TimeBasedSnapshotCleanupPolicy { - - private final DateTime MOCK_CURRENT_TIME; - - public MockTimeBasedSnapshotCleanupPolicy(Properties props, DateTime mockCurrentTime) { - super(props); - this.MOCK_CURRENT_TIME = mockCurrentTime; - } - - @Override - public boolean shouldDeleteSnapshot(FileStatus snapshot, Trash trash) { - DateTime snapshotTime = Trash.TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(snapshot.getPath().getName()); - return snapshotTime.plusMinutes(this.retentionMinutes).isBefore(this.MOCK_CURRENT_TIME.withZoneRetainFields(this.retentionSnapshotTimezone)); - } - } - } \ No newline at end of file diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TrashTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TrashTest.java index 3372533cae9..0e6a05bd826 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TrashTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TrashTest.java @@ -318,4 +318,27 @@ public Boolean answer(InvocationOnMock invocation) } } + /** + * Test Trash.getTrash() method to ensure that the correct Trash implementation is returned based on the properties evaluation + */ + @Test + public void testGetTrashEvanluateExpected() throws IOException { + Properties properties = new Properties(); + properties.setProperty(Trash.TRASH_CLASS_KEY, MockTrash.class.getCanonicalName()); + properties.setProperty(TrashFactory.SIMULATE, "true"); + properties.setProperty(TrashFactory.SIMULATE_USING_ACTUAL_TRASH, "true"); + properties.setProperty(Trash.TRASH_LOCATION_KEY, "/trash/dir"); + FileSystem mockFs = mock(FileSystem.class); + + Trash trash = Trash.getTrash(mockFs, properties, "testUser"); + + // Assert if condition is evaluated and the correct trash is returned + Assert.assertTrue(trash instanceof MockTrash); + + properties.remove(Trash.TRASH_CLASS_KEY); + Trash trash2 = Trash.getTrash(mockFs, properties, "testUser"); + // Assert if the trash would default to Trash if the condition is not met + Assert.assertTrue(trash2 instanceof Trash); + } + } From 5b6fa8fc336822903a5d9ee0cf0d762f74abd99d Mon Sep 17 00:00:00 2001 From: welin Date: Wed, 20 Nov 2024 14:58:10 -0800 Subject: [PATCH 9/9] Mock the FileStatus object to return the desired path --- .../TimeBasedSnapshotCleanupPolicyTest.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java index 7c6dfd0a387..f92c63a05df 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/trash/TimeBasedSnapshotCleanupPolicyTest.java @@ -21,6 +21,7 @@ import org.apache.hadoop.fs.Path; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; +import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; import org.testng.annotations.BeforeMethod; @@ -51,11 +52,16 @@ public void testShouldDeleteSnapshot() throws IOException { // Get the current time DateTime now = DateTime.now(DateTimeZone.UTC); - // Create dummy paths with timestamps between 11 and 9 hours ago in UTC - FileStatus fs1 = new FileStatus(0, true, 0, 0, 0, - new Path(trash.getTrashLocation(), now.minusHours(11).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); - FileStatus fs2 = new FileStatus(0, true, 0, 0, 0, - new Path(trash.getTrashLocation(), now.minusHours(9).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER))); + // Create dummy paths with timestamps between 11 and 9 hours ago + Path path1 = new Path(trash.getTrashLocation(), now.minusHours(11).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER)); + Path path2 = new Path(trash.getTrashLocation(), now.minusHours(9).toString(Trash.TRASH_SNAPSHOT_NAME_FORMATTER)); + + // Mock FileStatus to return the desired paths + FileStatus fs1 = Mockito.mock(FileStatus.class); + Mockito.when(fs1.getPath()).thenReturn(path1); + + FileStatus fs2 = Mockito.mock(FileStatus.class); + Mockito.when(fs2.getPath()).thenReturn(path2); // Test snapshot (should be deleted) Assert.assertTrue(cleanupPolicy.shouldDeleteSnapshot(fs1, trash),"Snapshot should be deleted");