apache · jerryshao · May 7, 2020 · Jun 22, 2020 · Jul 22, 2020 · Jul 22, 2020
diff --git a/core/src/main/java/org/apache/iceberg/MicroBatches.java b/core/src/main/java/org/apache/iceberg/MicroBatches.java
@@ -177,11 +177,11 @@ private static List<Pair<ManifestFile, Integer>> skipManifests(List<Pair<Manifes
      * @param startFileIndex A startFileIndex used to skip processed files.
      * @param targetSizeInBytes Used to control the size of MicroBatch, the processed file bytes must be smaller than
      *                         this size.
-     * @param isStarting Used to check where all the data file should be processed, or only added files.
+     * @param scanAllFiles Used to check whether all the data file should be processed, or only added files.
      * @return A MicroBatch.
      */
     private MicroBatch generateMicroBatch(List<Pair<ManifestFile, Integer>> indexedManifests,
-                                          int startFileIndex, long targetSizeInBytes, boolean isStarting) {
+                                          int startFileIndex, long targetSizeInBytes, boolean scanAllFiles) {
       if (indexedManifests.isEmpty()) {
         return new MicroBatch(snapshot.snapshotId(), startFileIndex, startFileIndex + 1, 0L,
             Collections.emptyList(), true);
@@ -195,7 +195,7 @@ private MicroBatch generateMicroBatch(List<Pair<ManifestFile, Integer>> indexedM
       for (int idx = 0; idx < indexedManifests.size(); idx++) {
         currentFileIndex = indexedManifests.get(idx).second();
 
-        try (CloseableIterable<FileScanTask> taskIterable = open(indexedManifests.get(idx).first(), isStarting);
+        try (CloseableIterable<FileScanTask> taskIterable = open(indexedManifests.get(idx).first(), scanAllFiles);
             CloseableIterator<FileScanTask> taskIter = taskIterable.iterator()) {
           while (taskIter.hasNext()) {
             FileScanTask task = taskIter.next();
@@ -238,11 +238,11 @@ private MicroBatch generateMicroBatch(List<Pair<ManifestFile, Integer>> indexedM
           tasks, isLastIndex);
     }
 
-    private CloseableIterable<FileScanTask> open(ManifestFile manifestFile, boolean isStarting) {
+    private CloseableIterable<FileScanTask> open(ManifestFile manifestFile, boolean scanAllFiles) {
       ManifestGroup manifestGroup = new ManifestGroup(io, ImmutableList.of(manifestFile))
           .specsById(specsById)
           .caseSensitive(caseSensitive);
-      if (isStarting) {
+      if (!scanAllFiles) {
         manifestGroup = manifestGroup
             .filterManifestEntries(entry ->
                 entry.snapshotId() == snapshot.snapshotId() && entry.status() == ManifestEntry.Status.ADDED)

diff --git a/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java b/core/src/main/java/org/apache/iceberg/util/SnapshotUtil.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.util;
 
 import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.Function;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Snapshot;
@@ -69,8 +70,20 @@ public static List<Long> currentAncestors(Table table) {
    * This method assumes that fromSnapshotId is an ancestor of toSnapshotId.
    */
   public static List<Long> snapshotIdsBetween(Table table, long fromSnapshotId, long toSnapshotId) {
+    AtomicBoolean isAncestor = new AtomicBoolean(false);
     List<Long> snapshotIds = Lists.newArrayList(ancestorIds(table.snapshot(toSnapshotId),
-        snapshotId -> snapshotId != fromSnapshotId ? table.snapshot(snapshotId) : null));
+        snapshotId -> {
+          if (snapshotId == fromSnapshotId) {
+            isAncestor.set(true);
+            return null;
+          } else {
+            return table.snapshot(snapshotId);
+          }
+        }));
+    if (!isAncestor.get()) {
+      throw new IllegalStateException(fromSnapshotId + " is not an ancestor of " + toSnapshotId);
+    }
+
     return snapshotIds;
   }
 

diff --git a/spark/src/main/java/org/apache/iceberg/spark/source/StreamingOffset.java b/spark/src/main/java/org/apache/iceberg/spark/source/StreamingOffset.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.spark.source;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import org.apache.iceberg.relocated.com.google.common.base.Objects;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.spark.sql.sources.v2.reader.streaming.Offset;
+
+/**
+ * An implementation of Spark Structured Streaming Offset, to track the current processed
+ * files of Iceberg table. This StreamingOffset consists of:
+ *
+ * version: The version of StreamingOffset. The offset was created with a version number used to validate
+ * when deserializing from json string.
+ * snapshot_id: The current processed snapshot id.
+ * index: The index of last scanned file in snapshot.
+ * scan_all_files: Denote whether to scan all files in a snapshot, currently we only scan all files in the starting
+ * snapshot.
+ * snapshot_fully_processed: Denote whether the current snapshot is fully processed, to avoid revisiting the processed
+ * snapshot.
+ */
+class StreamingOffset extends Offset {
+  static final StreamingOffset START_OFFSET = new StreamingOffset(-1L, -1, false, true);
+
+  private static final int CURR_VERSION = 1;
+  private static final String VERSION = "version";
+  private static final String SNAPSHOT_ID = "snapshot_id";
+  private static final String INDEX = "index";
+  private static final String SCAN_ALL_FILES = "scan_all_files";
+  private static final String SNAPSHOT_FULLY_PROCESSED = "snapshot_fully_processed";
+
+  private final long snapshotId;
+  private final int index;
+  private final boolean scanAllFiles;
+  private final boolean snapshotFullyProcessed;
+
+  StreamingOffset(long snapshotId, int index, boolean scanAllFiles, boolean snapshotFullyProcessed) {
+    this.snapshotId = snapshotId;
+    this.index = index;
+    this.scanAllFiles = scanAllFiles;
+    this.snapshotFullyProcessed = snapshotFullyProcessed;
+  }
+
+  static StreamingOffset fromJson(String json) {
+    Preconditions.checkNotNull(json, "The input JSON string is null");
+
+    try {
+      JsonNode node = JsonUtil.mapper().readValue(json, JsonNode.class);
+      int version = JsonUtil.getInt(VERSION, node);
+      if (version > CURR_VERSION) {
+        throw new IOException(String.format("Cannot deserialize a JSON offset from version %d. %d is not compatible " +
+            "with the version of Iceberg %d and cannot be used. Please use a compatible version of Iceberg " +
+            "to read this offset", version, version, CURR_VERSION));
+      }
+
+      long snapshotId = JsonUtil.getLong(SNAPSHOT_ID, node);
+      int index = JsonUtil.getInt(INDEX, node);
+      boolean shouldScanAllFiles = JsonUtil.getBool(SCAN_ALL_FILES, node);
+      boolean snapshotFullyProcessed = JsonUtil.getBool(SNAPSHOT_FULLY_PROCESSED, node);
+
+      return new StreamingOffset(snapshotId, index, shouldScanAllFiles, snapshotFullyProcessed);
+    } catch (IOException e) {
+      throw new IllegalStateException(String.format("Failed to parse StreamingOffset from JSON string %s", json), e);
+    }
+  }
+
+  @Override
+  public String json() {
+    StringWriter writer = new StringWriter();
+    try {
+      JsonGenerator generator = JsonUtil.factory().createGenerator(writer);
+      generator.writeStartObject();
+      generator.writeNumberField(VERSION, CURR_VERSION);
+      generator.writeNumberField(SNAPSHOT_ID, snapshotId);
+      generator.writeNumberField(INDEX, index);
+      generator.writeBooleanField(SCAN_ALL_FILES, scanAllFiles);
+      generator.writeBooleanField(SNAPSHOT_FULLY_PROCESSED, snapshotFullyProcessed);
+      generator.writeEndObject();
+      generator.flush();
+
+    } catch (IOException e) {
+      throw new UncheckedIOException("Failed to write StreamingOffset to json", e);
+    }
+
+    return writer.toString();
+  }
+
+  long snapshotId() {
+    return snapshotId;
+  }
+
+  int index() {
+    return index;
+  }
+
+  boolean shouldScanAllFiles() {
+    return scanAllFiles;
+  }
+
+  boolean isSnapshotFullyProcessed() {
+    return snapshotFullyProcessed;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj instanceof StreamingOffset) {
+      StreamingOffset offset = (StreamingOffset) obj;
+      return offset.snapshotId == snapshotId &&
+          offset.index == index &&
+          offset.scanAllFiles == scanAllFiles &&
+          offset.snapshotFullyProcessed == snapshotFullyProcessed;
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(snapshotId, index, scanAllFiles, snapshotFullyProcessed);
+  }
+
+  @Override
+  public String toString() {
+    return String.format("Streaming Offset[%d: index (%d) scan_all_files (%b) snapshot_fully_processed (%b)]",
+      snapshotId, index, scanAllFiles, snapshotFullyProcessed);
+  }
+}
diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java b/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java
@@ -42,16 +42,19 @@
 import org.apache.spark.sql.sources.DataSourceRegister;
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.MicroBatchReadSupport;
 import org.apache.spark.sql.sources.v2.ReadSupport;
 import org.apache.spark.sql.sources.v2.StreamWriteSupport;
 import org.apache.spark.sql.sources.v2.WriteSupport;
 import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
+import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader;
 import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter;
 import org.apache.spark.sql.streaming.OutputMode;
 import org.apache.spark.sql.types.StructType;
 
-public class IcebergSource implements DataSourceV2, ReadSupport, WriteSupport, DataSourceRegister, StreamWriteSupport {
+public class IcebergSource implements DataSourceV2, ReadSupport, WriteSupport, DataSourceRegister,
+    StreamWriteSupport, MicroBatchReadSupport {
 
   private SparkSession lazySpark = null;
   private JavaSparkContext lazySparkContext = null;
@@ -129,6 +132,23 @@ public StreamWriter createStreamWriter(String runId, StructType dsStruct,
     return new StreamingWriter(table, io, encryptionManager, options, queryId, mode, appId, writeSchema, dsStruct);
   }
 
+  @Override
+  public MicroBatchReader createMicroBatchReader(Optional<StructType> schema, String checkpointLocation,
+                                                 DataSourceOptions options) {
+    if (schema.isPresent()) {
+      throw new IllegalStateException("Iceberg does not support specifying the schema at read time");
+    }
+
+    Configuration conf = new Configuration(lazyBaseConf());
+    Table table = getTableAndResolveHadoopConfiguration(options, conf);
+    String caseSensitive = lazySparkSession().conf().get("spark.sql.caseSensitive");
+
+    Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
+    Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());
+
+    return new StreamingReader(table, io, encryptionManager, Boolean.parseBoolean(caseSensitive), options);
+  }
+
   protected Table findTable(DataSourceOptions options, Configuration conf) {
     Optional<String> path = options.get("path");
     Preconditions.checkArgument(path.isPresent(), "Cannot open table: path is not set");

diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java
@@ -191,6 +191,26 @@ private StructType lazyType() {
     return type;
   }
 
+  protected Long splitSize() {
+    return splitSize;
+  }
+
+  protected Integer splitLookback() {
+    return splitLookback;
+  }
+
+  protected Long splitOpenFileCost() {
+    return splitOpenFileCost;
+  }
+
+  protected boolean caseSensitive() {
+    return caseSensitive;
+  }
+
+  protected List<Expression> filterExpressions() {
+    return filterExpressions;
+  }
+
   @Override
   public StructType readSchema() {
     return lazyType();
@@ -311,28 +331,7 @@ public Statistics estimateStatistics() {
   @Override
   public boolean enableBatchRead() {
     if (readUsingBatch == null) {
-      boolean allParquetFileScanTasks =
-          tasks().stream()
-              .allMatch(combinedScanTask -> !combinedScanTask.isDataTask() && combinedScanTask.files()
-                  .stream()
-                  .allMatch(fileScanTask -> fileScanTask.file().format().equals(
-                      FileFormat.PARQUET)));
-
-      boolean allOrcFileScanTasks =
-          tasks().stream()
-              .allMatch(combinedScanTask -> !combinedScanTask.isDataTask() && combinedScanTask.files()
-                  .stream()
-                  .allMatch(fileScanTask -> fileScanTask.file().format().equals(
-                      FileFormat.ORC)));
-
-      boolean atLeastOneColumn = lazySchema().columns().size() > 0;
-
-      boolean onlyPrimitives = lazySchema().columns().stream().allMatch(c -> c.type().isPrimitiveType());
-
-      boolean hasNoDeleteFiles = tasks().stream().noneMatch(TableScanUtil::hasDeletes);
-
-      this.readUsingBatch = batchReadsEnabled && hasNoDeleteFiles && (allOrcFileScanTasks ||
-          (allParquetFileScanTasks && atLeastOneColumn && onlyPrimitives));
+      this.readUsingBatch = checkEnableBatchRead(tasks());
     }
     return readUsingBatch;
   }
@@ -344,7 +343,7 @@ private static void mergeIcebergHadoopConfs(
         .forEach(key -> baseConf.set(key.replaceFirst("hadoop.", ""), options.get(key)));
   }
 
-  private List<CombinedScanTask> tasks() {
+  protected List<CombinedScanTask> tasks() {
     if (tasks == null) {
       TableScan scan = table
           .newScan()
@@ -395,6 +394,33 @@ private List<CombinedScanTask> tasks() {
     return tasks;
   }
 
+  // An extracted method which will be overrided by StreamingReader. This is because the tasks generated by Streaming is
+  // per batch and cannot be planned like Reader beforehand.
+  protected boolean checkEnableBatchRead(List<CombinedScanTask> taskList) {
+    boolean allParquetFileScanTasks =
+        taskList.stream()
+            .allMatch(combinedScanTask -> !combinedScanTask.isDataTask() && combinedScanTask.files()
+                .stream()
+                .allMatch(fileScanTask -> fileScanTask.file().format().equals(
+                    FileFormat.PARQUET)));
+
+    boolean allOrcFileScanTasks =
+        taskList.stream()
+            .allMatch(combinedScanTask -> !combinedScanTask.isDataTask() && combinedScanTask.files()
+                .stream()
+                .allMatch(fileScanTask -> fileScanTask.file().format().equals(
+                    FileFormat.ORC)));
+
+    boolean atLeastOneColumn = lazySchema().columns().size() > 0;
+
+    boolean onlyPrimitives = lazySchema().columns().stream().allMatch(c -> c.type().isPrimitiveType());
+
+    boolean hasNoDeleteFiles = taskList.stream().noneMatch(TableScanUtil::hasDeletes);
+
+    return batchReadsEnabled && hasNoDeleteFiles && (allOrcFileScanTasks ||
+        (allParquetFileScanTasks && atLeastOneColumn && onlyPrimitives));
+  }
+
   @Override
   public String toString() {
     return String.format(