apache · szehon-ho · Mar 29, 2021
diff --git a/core/src/main/java/org/apache/iceberg/ManifestReader.java b/core/src/main/java/org/apache/iceberg/ManifestReader.java
@@ -44,6 +44,7 @@
 import org.apache.iceberg.types.Types;
 
 import static org.apache.iceberg.expressions.Expressions.alwaysTrue;
+import static org.apache.iceberg.types.Types.NestedField.required;
 
 /**
  * Base reader for data and delete manifest files.
@@ -193,6 +194,11 @@ private CloseableIterable<ManifestEntry<F>> open(Schema projection) {
 
     List<Types.NestedField> fields = Lists.newArrayList();
     fields.addAll(projection.asStruct().fields());
+    if (fields.isEmpty()) {
+      // For aggregation on Metadata "Entries" Tables, Spark passes an empty projection.
+      // This hits issues in BuildAvroProjection as 'data_files' is an 'required' field.
+      fields.add(required(DataFile.PARTITION_ID, DataFile.PARTITION_NAME, spec.partitionType()));
+    }
     fields.add(MetadataColumns.ROW_POSITION);
 
     switch (format) {

diff --git a/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java b/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java
@@ -222,6 +222,30 @@ public void testCountEntriesTable() {
         expectedEntryCount, spark.read().format("iceberg").load(loadLocation(tableIdentifier, "all_entries")).count());
   }
 
+  @Test
+  public void testCountEntriesPartitionedTable() {
+    TableIdentifier tableIdentifier = TableIdentifier.of("db", "count_entries_partitioned_test");
+    createTable(tableIdentifier, SCHEMA, PartitionSpec.builderFor(SCHEMA).identity("id").build());
+
+    // init load
+    List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "1"));
+    Dataset<Row> inputDf = spark.createDataFrame(records, SimpleRecord.class);
+    inputDf.select("id", "data").write()
+        .format("iceberg")
+        .mode("append")
+        .save(loadLocation(tableIdentifier));
+
+    final int expectedEntryCount = 1;
+
+    // count entries
+    Assert.assertEquals("Count should return " + expectedEntryCount,
+        expectedEntryCount, spark.read().format("iceberg").load(loadLocation(tableIdentifier, "entries")).count());
+
+    // count all_entries
+    Assert.assertEquals("Count should return " + expectedEntryCount,
+        expectedEntryCount, spark.read().format("iceberg").load(loadLocation(tableIdentifier, "all_entries")).count());
+  }
+
   @Test
   public void testFilesTable() throws Exception {
     TableIdentifier tableIdentifier = TableIdentifier.of("db", "files_test");