apache · aokolnychyi · May 29, 2020 · May 9, 2020 · May 25, 2020 · May 12, 2020
diff --git a/api/src/main/java/org/apache/iceberg/ContentFile.java b/api/src/main/java/org/apache/iceberg/ContentFile.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Superinterface of {@link DataFile} and {@link DeleteFile} that exposes common methods.
+ *
+ * @param <F> the concrete Java class of a ContentFile instance.
+ */
+public interface ContentFile<F> {
+  /**
+   * @return type of content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES
+   */
+  FileContent content();
+
+  /**
+   * @return fully qualified path to the file, suitable for constructing a Hadoop Path
+   */
+  CharSequence path();
+
+  /**
+   * @return format of the file
+   */
+  FileFormat format();
+
+  /**
+   * @return partition for this file as a {@link StructLike}
+   */
+  StructLike partition();
+
+  /**
+   * @return the number of top-level records in the file
+   */
+  long recordCount();
+
+  /**
+   * @return the file size in bytes
+   */
+  long fileSizeInBytes();
+
+  /**
+   * @return if collected, map from column ID to the size of the column in bytes, null otherwise
+   */
+  Map<Integer, Long> columnSizes();
+
+  /**
+   * @return if collected, map from column ID to the count of its non-null values, null otherwise
+   */
+  Map<Integer, Long> valueCounts();
+
+  /**
+   * @return if collected, map from column ID to its null value count, null otherwise
+   */
+  Map<Integer, Long> nullValueCounts();
+
+  /**
+   * @return if collected, map from column ID to value lower bounds, null otherwise
+   */
+  Map<Integer, ByteBuffer> lowerBounds();
+
+  /**
+   * @return if collected, map from column ID to value upper bounds, null otherwise
+   */
+  Map<Integer, ByteBuffer> upperBounds();
+
+  /**
+   * @return metadata about how this file is encrypted, or null if the file is stored in plain
+   *         text.
+   */
+  ByteBuffer keyMetadata();
+
+  /**
+   * @return List of recommended split locations, if applicable, null otherwise.
+   * When available, this information is used for planning scan tasks whose boundaries
+   * are determined by these offsets. The returned list must be sorted in ascending order.
+   */
+  List<Long> splitOffsets();
+
+
+  /**
+   * Copies this file. Manifest readers can reuse file instances; use
+   * this method to copy data when collecting files from tasks.
+   *
+   * @return a copy of this data file
+   */
+  F copy();
+
+  /**
+   * Copies this file without file stats. Manifest readers can reuse file instances; use
+   * this method to copy data without stats when collecting files.
+   *
+   * @return a copy of this data file, without lower bounds, upper bounds, value counts, or null value counts
+   */
+  F copyWithoutStats();
+}
diff --git a/api/src/main/java/org/apache/iceberg/DataFile.java b/api/src/main/java/org/apache/iceberg/DataFile.java
@@ -19,9 +19,6 @@
 
 package org.apache.iceberg;
 
-import java.nio.ByteBuffer;
-import java.util.List;
-import java.util.Map;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.types.Types.BinaryType;
 import org.apache.iceberg.types.Types.IntegerType;
@@ -35,9 +32,9 @@
 import static org.apache.iceberg.types.Types.NestedField.required;
 
 /**
- * Interface for files listed in a table manifest.
+ * Interface for data files listed in a table manifest.
  */
-public interface DataFile {
+public interface DataFile extends ContentFile<DataFile> {
   // fields for adding delete data files
   Types.NestedField CONTENT = optional(134, "content", IntegerType.get(),
       "Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
@@ -86,86 +83,8 @@ static StructType getType(StructType partitionType) {
   /**
    * @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES
    */
+  @Override
   default FileContent content() {
     return FileContent.DATA;
   }
-
-  /**
-   * @return fully qualified path to the file, suitable for constructing a Hadoop Path
-   */
-  CharSequence path();
-
-  /**
-   * @return format of the data file
-   */
-  FileFormat format();
-
-  /**
-   * @return partition data for this file as a {@link StructLike}
-   */
-  StructLike partition();
-
-  /**
-   * @return the number of top-level records in the data file
-   */
-  long recordCount();
-
-  /**
-   * @return the data file size in bytes
-   */
-  long fileSizeInBytes();
-
-  /**
-   * @return if collected, map from column ID to the size of the column in bytes, null otherwise
-   */
-  Map<Integer, Long> columnSizes();
-
-  /**
-   * @return if collected, map from column ID to the count of its non-null values, null otherwise
-   */
-  Map<Integer, Long> valueCounts();
-
-  /**
-   * @return if collected, map from column ID to its null value count, null otherwise
-   */
-  Map<Integer, Long> nullValueCounts();
-
-  /**
-   * @return if collected, map from column ID to value lower bounds, null otherwise
-   */
-  Map<Integer, ByteBuffer> lowerBounds();
-
-  /**
-   * @return if collected, map from column ID to value upper bounds, null otherwise
-   */
-  Map<Integer, ByteBuffer> upperBounds();
-
-  /**
-   * @return metadata about how this file is encrypted, or null if the file is stored in plain
-   *         text.
-   */
-  ByteBuffer keyMetadata();
-
-  /**
-   * @return List of recommended split locations, if applicable, null otherwise.
-   * When available, this information is used for planning scan tasks whose boundaries
-   * are determined by these offsets. The returned list must be sorted in ascending order.
-   */
-  List<Long> splitOffsets();
-
-  /**
-   * Copies this {@link DataFile data file}. Manifest readers can reuse data file instances; use
-   * this method to copy data when collecting files from tasks.
-   *
-   * @return a copy of this data file
-   */
-  DataFile copy();
-
-  /**
-   * Copies this {@link DataFile data file} without file stats. Manifest readers can reuse data file instances; use
-   * this method to copy data without stats when collecting files.
-   *
-   * @return a copy of this data file, without lower bounds, upper bounds, value counts, or null value counts
-   */
-  DataFile copyWithoutStats();
 }
diff --git a/api/src/main/java/org/apache/iceberg/DeleteFile.java b/api/src/main/java/org/apache/iceberg/DeleteFile.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.util.List;
+
+/**
+ * Interface for delete files listed in a table delete manifest.
+ */
+public interface DeleteFile extends ContentFile<DeleteFile> {
+  /**
+   * @return List of recommended split locations, if applicable, null otherwise.
+   * When available, this information is used for planning scan tasks whose boundaries
+   * are determined by these offsets. The returned list must be sorted in ascending order.
+   */
+  @Override
+  default List<Long> splitOffsets() {
+    return null;
+  }
+}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
@@ -25,6 +25,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
+import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
@@ -40,7 +41,7 @@
  * <p>
  * This evaluation is inclusive: it returns true if a file may match and false if it cannot match.
  * <p>
- * Files are passed to {@link #eval(DataFile)}, which returns true if the file may contain matching
+ * Files are passed to {@link #eval(ContentFile)}, which returns true if the file may contain matching
  * rows and false if the file cannot contain matching rows. Files may be skipped if and only if the
  * return value of {@code eval} is false.
  */
@@ -70,7 +71,7 @@ public InclusiveMetricsEvaluator(Schema schema, Expression unbound, boolean case
    * @param file a data file
    * @return false if the file cannot contain rows that match the expression, true otherwise.
    */
-  public boolean eval(DataFile file) {
+  public boolean eval(ContentFile<?> file) {
     // TODO: detect the case where a column is missing from the file using file's max field id.
     return visitor().eval(file);
   }
@@ -84,7 +85,7 @@ private class MetricsEvalVisitor extends BoundExpressionVisitor<Boolean> {
     private Map<Integer, ByteBuffer> lowerBounds = null;
     private Map<Integer, ByteBuffer> upperBounds = null;
 
-    private boolean eval(DataFile file) {
+    private boolean eval(ContentFile<?> file) {
       if (file.recordCount() == 0) {
         return ROWS_CANNOT_MATCH;
       }