apache · stevenzwu · Jul 1, 2021 · Sep 8, 2021 · Sep 8, 2021 · Sep 8, 2021
diff --git a/flink-runtime/build.gradle b/flink-runtime/build.gradle
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+project(':iceberg-flink-runtime') {
+  apply plugin: 'com.github.johnrengelman.shadow'
+
+  tasks.jar.dependsOn tasks.shadowJar
+
+  configurations {
+    implementation {
+      exclude group: 'org.apache.flink'
+      // included in Flink
+      exclude group: 'org.slf4j'
+      exclude group: 'org.apache.commons'
+      exclude group: 'commons-pool'
+      exclude group: 'commons-codec'
+      exclude group: 'org.xerial.snappy'
+      exclude group: 'javax.xml.bind'
+      exclude group: 'javax.annotation'
+    }
+  }
+
+  dependencies {
+    implementation project(':iceberg-flink')
+    implementation project(':iceberg-aws')
+    implementation(project(':iceberg-nessie')) {
+      exclude group: 'com.google.code.findbugs', module: 'jsr305'
+    }
+
+    // flink-connector-base is not part of Flink runtime. Hence,
+    // iceberg-flink-runtime should include it as a transitive dependency.
+    implementation "org.apache.flink:flink-connector-base"
+  }
+
+  shadowJar {
+    configurations = [project.configurations.runtimeClasspath]
+
+    zip64 true
+
+    // include the LICENSE and NOTICE files for the shaded Jar
+    from(projectDir) {
+      include 'LICENSE'
+      include 'NOTICE'
+    }
+
+    // Relocate dependencies to avoid conflicts
+    relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro'
+    relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet'
+    relocate 'com.google', 'org.apache.iceberg.shaded.com.google'
+    relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml'
+    relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes'
+    relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework'
+    relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded'
+    relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc'
+    relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift'
+    relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra'
+
+    classifier null
+  }
+
+  jar {
+    enabled = false
+  }
+}
+
diff --git a/flink/build.gradle b/flink/build.gradle
@@ -28,6 +28,7 @@ project(':iceberg-flink') {
     implementation project(':iceberg-parquet')
     implementation project(':iceberg-hive-metastore')
 
+    compileOnly "org.apache.flink:flink-connector-base"
     compileOnly "org.apache.flink:flink-streaming-java_2.12"
     compileOnly "org.apache.flink:flink-streaming-java_2.12::tests"
     compileOnly "org.apache.flink:flink-table-api-java-bridge_2.12"
@@ -56,6 +57,7 @@ project(':iceberg-flink') {
       exclude group: 'org.apache.hive', module: 'hive-storage-api'
     }
 
+    testImplementation "org.apache.flink:flink-connector-test-utils"
     testImplementation "org.apache.flink:flink-core"
     testImplementation "org.apache.flink:flink-runtime_2.12"
     testImplementation "org.apache.flink:flink-table-planner-blink_2.12"

diff --git a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
@@ -40,4 +40,10 @@ private FlinkConfigOptions() {
           .intType()
           .defaultValue(100)
           .withDescription("Sets max infer parallelism for source operator.");
+
+  public static final ConfigOption<Integer> SOURCE_READER_FETCH_RECORD_BATCH_SIZE = ConfigOptions
+      .key("source.iceberg.reader.fetch-record-batch-size")
+      .intType()
+      .defaultValue(2048)
+      .withDescription("The target number of records for Iceberg reader fetch batch.");
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
@@ -29,6 +29,7 @@
 import org.apache.iceberg.encryption.InputFilesDecryptor;
 import org.apache.iceberg.io.CloseableIterator;
 import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
  * Flink data iterator that reads {@link CombinedScanTask} into a {@link CloseableIterator}
@@ -41,18 +42,47 @@ public class DataIterator<T> implements CloseableIterator<T> {
   private final FileScanTaskReader<T> fileScanTaskReader;
 
   private final InputFilesDecryptor inputFilesDecryptor;
-  private Iterator<FileScanTask> tasks;
+  private final CombinedScanTask combinedTask;
+  private final Position position;
+
+  private Iterator<FileScanTask> fileTasksIterator;
   private CloseableIterator<T> currentIterator;
 
   public DataIterator(FileScanTaskReader<T> fileScanTaskReader, CombinedScanTask task,
                       FileIO io, EncryptionManager encryption) {
     this.fileScanTaskReader = fileScanTaskReader;
 
     this.inputFilesDecryptor = new InputFilesDecryptor(task, io, encryption);
-    this.tasks = task.files().iterator();
+    this.combinedTask = task;
+    // fileOffset starts at -1 because we started
+    // from an empty iterator that is not from the split files.
+    this.position = new Position(-1, 0L);
+
+    this.fileTasksIterator = task.files().iterator();
     this.currentIterator = CloseableIterator.empty();
   }
 
+  public void seek(Position startingPosition) {
+    // skip files
+    Preconditions.checkArgument(startingPosition.fileOffset() < combinedTask.files().size(),
+        "Checkpointed file offset is %d, while CombinedScanTask has %d files",
+        startingPosition.fileOffset(), combinedTask.files().size());
+    for (long i = 0L; i < startingPosition.fileOffset(); ++i) {
+      fileTasksIterator.next();
+    }
+    updateCurrentIterator();
+    // skip records within the file
+    for (long i = 0; i < startingPosition.recordOffset(); ++i) {
+      if (hasNext()) {
+        next();
+      } else {
+        throw new IllegalStateException("Not enough records to skip: " +
+            startingPosition.recordOffset());
+      }
+    }
+    this.position.update(startingPosition.fileOffset(), startingPosition.recordOffset());
+  }
+
   @Override
   public boolean hasNext() {
     updateCurrentIterator();
@@ -62,18 +92,24 @@ public boolean hasNext() {
   @Override
   public T next() {
     updateCurrentIterator();
+    position.advanceRecord();
     return currentIterator.next();
   }
 
+  public boolean isCurrentIteratorDone() {
+    return !currentIterator.hasNext();
+  }
+
   /**
    * Updates the current iterator field to ensure that the current Iterator
    * is not exhausted.
    */
   private void updateCurrentIterator() {
     try {
-      while (!currentIterator.hasNext() && tasks.hasNext()) {
+      while (!currentIterator.hasNext() && fileTasksIterator.hasNext()) {
         currentIterator.close();
-        currentIterator = openTaskIterator(tasks.next());
+        currentIterator = openTaskIterator(fileTasksIterator.next());
+        position.advanceFile();
       }
     } catch (IOException e) {
       throw new UncheckedIOException(e);
@@ -88,6 +124,10 @@ private CloseableIterator<T> openTaskIterator(FileScanTask scanTask) {
   public void close() throws IOException {
     // close the current iterator
     currentIterator.close();
-    tasks = null;
+    fileTasksIterator = null;
+  }
+
+  public Position position() {
+    return position;
   }
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkInputFormat.java b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkInputFormat.java
@@ -77,7 +77,7 @@ public FlinkInputSplit[] createInputSplits(int minNumSplits) throws IOException
     tableLoader.open();
     try (TableLoader loader = tableLoader) {
       Table table = loader.loadTable();
-      return FlinkSplitGenerator.createInputSplits(table, context);
+      return FlinkSplitPlanner.planInputSplits(table, context);
     }
   }
 

diff --git a/...erg/flink/source/FlinkSplitGenerator.java → ...eberg/flink/source/FlinkSplitPlanner.java b/...erg/flink/source/FlinkSplitGenerator.java → ...eberg/flink/source/FlinkSplitPlanner.java
@@ -22,33 +22,58 @@
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.List;
+import org.apache.flink.annotation.Internal;
 import org.apache.iceberg.CombinedScanTask;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.TableScan;
 import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 
-class FlinkSplitGenerator {
-  private FlinkSplitGenerator() {
+@Internal
+public class FlinkSplitPlanner {
+  private FlinkSplitPlanner() {
   }
 
-  static FlinkInputSplit[] createInputSplits(Table table, ScanContext context) {
-    List<CombinedScanTask> tasks = tasks(table, context);
-    FlinkInputSplit[] splits = new FlinkInputSplit[tasks.size()];
-    for (int i = 0; i < tasks.size(); i++) {
-      splits[i] = new FlinkInputSplit(i, tasks.get(i));
+  static FlinkInputSplit[] planInputSplits(Table table, ScanContext context) {
+    try (CloseableIterable<CombinedScanTask> tasksIterable = planTasks(table, context)) {
+      List<CombinedScanTask> tasks = Lists.newArrayList(tasksIterable);
+      FlinkInputSplit[] splits = new FlinkInputSplit[tasks.size()];
+      for (int i = 0; i < tasks.size(); i++) {
+        splits[i] = new FlinkInputSplit(i, tasks.get(i));
+      }
+      return splits;
+    } catch (IOException e) {
+      throw new UncheckedIOException("Failed to process tasks iterable", e);
+    }
+  }
+
+  /**
+   * This returns splits for the FLIP-27 source
+   */
+  public static List<IcebergSourceSplit> planIcebergSourceSplits(
+      Table table, ScanContext context) {
+    try (CloseableIterable<CombinedScanTask> tasksIterable = planTasks(table, context)) {
+      List<IcebergSourceSplit> splits = Lists.newArrayList();
+      tasksIterable.forEach(task -> splits.add(IcebergSourceSplit.fromCombinedScanTask(task)));
+      return splits;
+    } catch (IOException e) {
+      throw new UncheckedIOException("Failed to process task iterable: ", e);
     }
-    return splits;
   }
 
-  private static List<CombinedScanTask> tasks(Table table, ScanContext context) {
+  static CloseableIterable<CombinedScanTask> planTasks(Table table, ScanContext context) {
     TableScan scan = table
         .newScan()
         .caseSensitive(context.caseSensitive())
         .project(context.project());
 
+    if (context.includeColumnStats()) {
+      scan = scan.includeColumnStats();
+    }
+
     if (context.snapshotId() != null) {
       scan = scan.useSnapshot(context.snapshotId());
     }
@@ -83,10 +108,6 @@ private static List<CombinedScanTask> tasks(Table table, ScanContext context) {
       }
     }
 
-    try (CloseableIterable<CombinedScanTask> tasksIterable = scan.planTasks()) {
-      return Lists.newArrayList(tasksIterable);
-    } catch (IOException e) {
-      throw new UncheckedIOException("Failed to close table scan: " + scan, e);
-    }
+    return scan.planTasks();
   }
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/Position.java b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source;
+
+import java.io.Serializable;
+import java.util.Objects;
+import org.apache.flink.annotation.Internal;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
+
+/**
+ * A mutable class that defines the read position
+ * <ul>
+ *   <li>file offset in the list of files in a {@link CombinedScanTask}</li>
+ *   <li>record offset within a file</li>
+ * </ul>
+ */
+@Internal
+public class Position implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+
+  private int fileOffset;
+  private long recordOffset;
+
+  public Position(int fileOffset, long recordOffset) {
+    this.fileOffset = fileOffset;
+    this.recordOffset = recordOffset;
+  }
+
+  void advanceFile() {
+    this.fileOffset += 1;
+    this.recordOffset = 0L;
+  }
+
+  void advanceRecord() {
+    this.recordOffset += 1L;
+  }
+
+  public void update(int newFileOffset, long newRecordOffset) {
+    this.fileOffset = newFileOffset;
+    this.recordOffset = newRecordOffset;
+  }
+
+  public int fileOffset() {
+    return fileOffset;
+  }
+
+  public long recordOffset() {
+    return recordOffset;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    final Position that = (Position) o;
+    return Objects.equals(fileOffset, that.fileOffset) &&
+        Objects.equals(recordOffset, that.recordOffset);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(fileOffset, recordOffset);
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(this)
+        .add("fileOffset", fileOffset)
+        .add("recordOffset", recordOffset)
+        .toString();
+  }
+}