apache · rdblue · Dec 21, 2021 · Dec 20, 2021 · Dec 20, 2021 · Dec 20, 2021
diff --git a/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java b/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java
@@ -102,43 +102,37 @@ public static Iterable<Snapshot> ancestorsOf(long snapshotId, Function<Long, Sna
   }
 
   /**
-   * Traverses the history of the table's current snapshot and:
-   * 1. returns null, if no snapshot exists or target timestamp is more recent than the current snapshot.
-   * 2. else return the first snapshot which satisfies {@literal >=} targetTimestamp.
-   * <p>
-   * Given the snapshots (with timestamp): [S1 (10), S2 (11), S3 (12), S4 (14)]
-   * <p>
-   * firstSnapshotAfterTimestamp(table, x {@literal <=} 10) = S1
-   * firstSnapshotAfterTimestamp(table, 11) = S2
-   * firstSnapshotAfterTimestamp(table, 13) = S4
-   * firstSnapshotAfterTimestamp(table, 14) = S4
-   * firstSnapshotAfterTimestamp(table, x {@literal >} 14) = null
-   * <p>
-   * where x is the target timestamp in milliseconds and Si is the snapshot
+   * Traverses the history of the table's current snapshot and finds the first snapshot committed after the given time.
    *
    * @param table a table
-   * @param targetTimestampMillis a timestamp in milliseconds
-   * @return the first snapshot which satisfies {@literal >=} targetTimestamp, or null if the current snapshot is
-   * more recent than the target timestamp
+   * @param timestampMillis a timestamp in milliseconds
+   * @return the first snapshot after the given timestamp, or null if the current snapshot is older than the timestamp
+   * @throws IllegalStateException if the first ancestor after the given time can't be determined
    */
-  public static Snapshot firstSnapshotAfterTimestamp(Table table, Long targetTimestampMillis) {
-    Snapshot currentSnapshot = table.currentSnapshot();
-    // Return null if no snapshot exists or target timestamp is more recent than the current snapshot
-    if (currentSnapshot == null || currentSnapshot.timestampMillis() < targetTimestampMillis) {
+  public static Snapshot oldestAncestorAfter(Table table, long timestampMillis) {
+    if (table.currentSnapshot() == null) {
+      // there are no snapshots or ancestors
       return null;
     }
 
-    // Return the oldest snapshot which satisfies >= targetTimestamp
     Snapshot lastSnapshot = null;
     for (Snapshot snapshot : currentAncestors(table)) {
-      if (snapshot.timestampMillis() < targetTimestampMillis) {
+      if (snapshot.timestampMillis() < timestampMillis) {
         return lastSnapshot;
+      } else if (snapshot.timestampMillis() == timestampMillis) {
+        return snapshot;
       }
+
       lastSnapshot = snapshot;
     }
 
-    // Return the oldest snapshot if the target timestamp is less than the oldest snapshot of the table
-    return lastSnapshot;
+    if (lastSnapshot != null && lastSnapshot.parentId() == null) {
+      // this is the first snapshot in the table, return it
+      return lastSnapshot;
+    }
+
+    throw new IllegalStateException(
+        "Cannot find snapshot older than " + DateTimeUtil.formatTimestampMillis(timestampMillis));
   }
 
   /**

diff --git a/spark/v3.0/spark/src/main/java/org/apache/iceberg/spark/source/SparkMicroBatchStream.java b/spark/v3.0/spark/src/main/java/org/apache/iceberg/spark/source/SparkMicroBatchStream.java
@@ -99,12 +99,12 @@ public class SparkMicroBatchStream implements MicroBatchStream {
   @Override
   public Offset latestOffset() {
     table.refresh();
-    if (isStreamEmpty(table)) {
+    if (table.currentSnapshot() == null) {
       return StreamingOffset.START_OFFSET;
     }
 
-    if (isFutureStartTime(table, fromTimestamp)) {
-      return initialFutureStartOffset(table);
+    if (table.currentSnapshot().timestampMillis() < fromTimestamp) {
+      return StreamingOffset.START_OFFSET;
     }
 
     Snapshot latestSnapshot = table.currentSnapshot();
@@ -169,8 +169,7 @@ public void stop() {
   private List<FileScanTask> planFiles(StreamingOffset startOffset, StreamingOffset endOffset) {
     List<FileScanTask> fileScanTasks = Lists.newArrayList();
     StreamingOffset batchStartOffset = StreamingOffset.START_OFFSET.equals(startOffset) ?
-        new StreamingOffset(SnapshotUtil.firstSnapshotAfterTimestamp(table, fromTimestamp).snapshotId(), 0, false) :
-        startOffset;
+        determineStartingOffset(table, fromTimestamp) : startOffset;
 
     StreamingOffset currentOffset = null;
 
@@ -208,26 +207,31 @@ private boolean shouldProcess(Snapshot snapshot) {
     return op.equals(DataOperations.APPEND);
   }
 
-  private static boolean isStreamEmpty(Table table) {
-    return table.currentSnapshot() == null;
-  }
-
-  private static boolean isStreamNotEmpty(Table table) {
-    return table.currentSnapshot() != null;
-  }
+  private static StreamingOffset determineStartingOffset(Table table, Long fromTimestamp) {
+    if (table.currentSnapshot() == null) {
+      return StreamingOffset.START_OFFSET;
+    }
 
-  private static boolean isFutureStartTime(Table table, Long streamStartTimeStampMillis) {
-    if (streamStartTimeStampMillis == null) {
-      return false;
+    if (fromTimestamp == null) {
+      // match existing behavior and start from the oldest snapshot
+      return new StreamingOffset(SnapshotUtil.oldestAncestor(table).snapshotId(), 0, false);
     }
 
-    return table.currentSnapshot().timestampMillis() < streamStartTimeStampMillis;
-  }
+    if (table.currentSnapshot().timestampMillis() < fromTimestamp) {
+      return StreamingOffset.START_OFFSET;
+    }
 
-  private static StreamingOffset initialFutureStartOffset(Table table) {
-    Preconditions.checkNotNull(table, "Cannot process future start offset with invalid table input.");
-    Snapshot latestSnapshot = table.currentSnapshot();
-    return new StreamingOffset(latestSnapshot.snapshotId(), Iterables.size(latestSnapshot.addedFiles()) + 1, false);
+    try {
+      Snapshot snapshot = SnapshotUtil.oldestAncestorAfter(table, fromTimestamp);
+      if (snapshot != null) {
+        return new StreamingOffset(snapshot.snapshotId(), 0, false);
+      } else {
+        return StreamingOffset.START_OFFSET;
+      }
+    } catch (IllegalStateException e) {
+      // could not determine the first snapshot after the timestamp. use the oldest ancestor instead
+      return new StreamingOffset(SnapshotUtil.oldestAncestor(table).snapshotId(), 0, false);
+    }
   }
 
   private static class InitialOffsetStore {
@@ -250,11 +254,7 @@ public StreamingOffset initialOffset() {
       }
 
       table.refresh();
-      StreamingOffset offset = StreamingOffset.START_OFFSET;
-      if (isStreamNotEmpty(table)) {
-        offset = isFutureStartTime(table, fromTimestamp) ? initialFutureStartOffset(table) :
-            new StreamingOffset(SnapshotUtil.firstSnapshotAfterTimestamp(table, fromTimestamp).snapshotId(), 0, false);
-      }
+      StreamingOffset offset = determineStartingOffset(table, fromTimestamp);
 
       OutputFile outputFile = io.newOutputFile(initialOffsetLocation);
       writeOffset(offset, outputFile);

diff --git a/spark/v3.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkMicroBatchStream.java b/spark/v3.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkMicroBatchStream.java
@@ -99,12 +99,12 @@ public class SparkMicroBatchStream implements MicroBatchStream {
   @Override
   public Offset latestOffset() {
     table.refresh();
-    if (isStreamEmpty(table)) {
+    if (table.currentSnapshot() == null) {
       return StreamingOffset.START_OFFSET;
     }
 
-    if (isFutureStartTime(table, fromTimestamp)) {
-      return initialFutureStartOffset(table);
+    if (table.currentSnapshot().timestampMillis() < fromTimestamp) {
+      return StreamingOffset.START_OFFSET;
     }
 
     Snapshot latestSnapshot = table.currentSnapshot();
@@ -169,8 +169,7 @@ public void stop() {
   private List<FileScanTask> planFiles(StreamingOffset startOffset, StreamingOffset endOffset) {
     List<FileScanTask> fileScanTasks = Lists.newArrayList();
     StreamingOffset batchStartOffset = StreamingOffset.START_OFFSET.equals(startOffset) ?
-        new StreamingOffset(SnapshotUtil.firstSnapshotAfterTimestamp(table, fromTimestamp).snapshotId(), 0, false) :
-        startOffset;
+        determineStartingOffset(table, fromTimestamp) : startOffset;
 
     StreamingOffset currentOffset = null;
 
@@ -208,26 +207,31 @@ private boolean shouldProcess(Snapshot snapshot) {
     return op.equals(DataOperations.APPEND);
   }
 
-  private static boolean isStreamEmpty(Table table) {
-    return table.currentSnapshot() == null;
-  }
-
-  private static boolean isStreamNotEmpty(Table table) {
-    return table.currentSnapshot() != null;
-  }
+  private static StreamingOffset determineStartingOffset(Table table, Long fromTimestamp) {
+    if (table.currentSnapshot() == null) {
+      return StreamingOffset.START_OFFSET;
+    }
 
-  private static boolean isFutureStartTime(Table table, Long streamStartTimeStampMillis) {
-    if (streamStartTimeStampMillis == null) {
-      return false;
+    if (fromTimestamp == null) {
+      // match existing behavior and start from the oldest snapshot
+      return new StreamingOffset(SnapshotUtil.oldestAncestor(table).snapshotId(), 0, false);
     }
 
-    return table.currentSnapshot().timestampMillis() < streamStartTimeStampMillis;
-  }
+    if (table.currentSnapshot().timestampMillis() < fromTimestamp) {
+      return StreamingOffset.START_OFFSET;
+    }
 
-  private static StreamingOffset initialFutureStartOffset(Table table) {
-    Preconditions.checkNotNull(table, "Cannot process future start offset with invalid table input.");
-    Snapshot latestSnapshot = table.currentSnapshot();
-    return new StreamingOffset(latestSnapshot.snapshotId(), Iterables.size(latestSnapshot.addedFiles()) + 1, false);
+    try {
+      Snapshot snapshot = SnapshotUtil.oldestAncestorAfter(table, fromTimestamp);
+      if (snapshot != null) {
+        return new StreamingOffset(snapshot.snapshotId(), 0, false);
+      } else {
+        return StreamingOffset.START_OFFSET;
+      }
+    } catch (IllegalStateException e) {
+      // could not determine the first snapshot after the timestamp. use the oldest ancestor instead
+      return new StreamingOffset(SnapshotUtil.oldestAncestor(table).snapshotId(), 0, false);
+    }
   }
 
   private static class InitialOffsetStore {
@@ -250,11 +254,7 @@ public StreamingOffset initialOffset() {
       }
 
       table.refresh();
-      StreamingOffset offset = StreamingOffset.START_OFFSET;
-      if (isStreamNotEmpty(table)) {
-        offset = isFutureStartTime(table, fromTimestamp) ? initialFutureStartOffset(table) :
-            new StreamingOffset(SnapshotUtil.firstSnapshotAfterTimestamp(table, fromTimestamp).snapshotId(), 0, false);
-      }
+      StreamingOffset offset = determineStartingOffset(table, fromTimestamp);
 
       OutputFile outputFile = io.newOutputFile(initialOffsetLocation);
       writeOffset(offset, outputFile);

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkMicroBatchStream.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkMicroBatchStream.java
@@ -99,12 +99,12 @@ public class SparkMicroBatchStream implements MicroBatchStream {
   @Override
   public Offset latestOffset() {
     table.refresh();
-    if (isStreamEmpty(table)) {
+    if (table.currentSnapshot() == null) {
       return StreamingOffset.START_OFFSET;
     }
 
-    if (isFutureStartTime(table, fromTimestamp)) {
-      return initialFutureStartOffset(table);
+    if (table.currentSnapshot().timestampMillis() < fromTimestamp) {
+      return StreamingOffset.START_OFFSET;
     }
 
     Snapshot latestSnapshot = table.currentSnapshot();
@@ -169,8 +169,7 @@ public void stop() {
   private List<FileScanTask> planFiles(StreamingOffset startOffset, StreamingOffset endOffset) {
     List<FileScanTask> fileScanTasks = Lists.newArrayList();
     StreamingOffset batchStartOffset = StreamingOffset.START_OFFSET.equals(startOffset) ?
-        new StreamingOffset(SnapshotUtil.firstSnapshotAfterTimestamp(table, fromTimestamp).snapshotId(), 0, false) :
-        startOffset;
+        determineStartingOffset(table, fromTimestamp) : startOffset;
 
     StreamingOffset currentOffset = null;
 
@@ -208,26 +207,31 @@ private boolean shouldProcess(Snapshot snapshot) {
     return op.equals(DataOperations.APPEND);
   }
 
-  private static boolean isStreamEmpty(Table table) {
-    return table.currentSnapshot() == null;
-  }
-
-  private static boolean isStreamNotEmpty(Table table) {
-    return table.currentSnapshot() != null;
-  }
+  private static StreamingOffset determineStartingOffset(Table table, Long fromTimestamp) {
+    if (table.currentSnapshot() == null) {
+      return StreamingOffset.START_OFFSET;
+    }
 
-  private static boolean isFutureStartTime(Table table, Long streamStartTimeStampMillis) {
-    if (streamStartTimeStampMillis == null) {
-      return false;
+    if (fromTimestamp == null) {
+      // match existing behavior and start from the oldest snapshot
+      return new StreamingOffset(SnapshotUtil.oldestAncestor(table).snapshotId(), 0, false);
     }
 
-    return table.currentSnapshot().timestampMillis() < streamStartTimeStampMillis;
-  }
+    if (table.currentSnapshot().timestampMillis() < fromTimestamp) {
+      return StreamingOffset.START_OFFSET;
+    }
 
-  private static StreamingOffset initialFutureStartOffset(Table table) {
-    Preconditions.checkNotNull(table, "Cannot process future start offset with invalid table input.");
-    Snapshot latestSnapshot = table.currentSnapshot();
-    return new StreamingOffset(latestSnapshot.snapshotId(), Iterables.size(latestSnapshot.addedFiles()) + 1, false);
+    try {
+      Snapshot snapshot = SnapshotUtil.oldestAncestorAfter(table, fromTimestamp);
+      if (snapshot != null) {
+        return new StreamingOffset(snapshot.snapshotId(), 0, false);
+      } else {
+        return StreamingOffset.START_OFFSET;
+      }
+    } catch (IllegalStateException e) {
+      // could not determine the first snapshot after the timestamp. use the oldest ancestor instead
+      return new StreamingOffset(SnapshotUtil.oldestAncestor(table).snapshotId(), 0, false);
+    }
   }
 
   private static class InitialOffsetStore {
@@ -250,11 +254,7 @@ public StreamingOffset initialOffset() {
       }
 
       table.refresh();
-      StreamingOffset offset = StreamingOffset.START_OFFSET;
-      if (isStreamNotEmpty(table)) {
-        offset = isFutureStartTime(table, fromTimestamp) ? initialFutureStartOffset(table) :
-            new StreamingOffset(SnapshotUtil.firstSnapshotAfterTimestamp(table, fromTimestamp).snapshotId(), 0, false);
-      }
+      StreamingOffset offset = determineStartingOffset(table, fromTimestamp);
 
       OutputFile outputFile = io.newOutputFile(initialOffsetLocation);
       writeOffset(offset, outputFile);