apache · amogh-jahagirdar · Jul 3, 2025 · Mar 4, 2025 · May 15, 2025 · May 16, 2025
diff --git a/api/src/main/java/org/apache/iceberg/actions/ActionsProvider.java b/api/src/main/java/org/apache/iceberg/actions/ActionsProvider.java
@@ -77,6 +77,12 @@ default ComputeTableStats computeTableStats(Table table) {
         this.getClass().getName() + " does not implement computeTableStats");
   }
 
+  /** Instantiates an action to compute partition stats. */
+  default ComputePartitionStats computePartitionStats(Table table) {
+    throw new UnsupportedOperationException(
+        this.getClass().getName() + " does not implement computePartitionStats");
+  }
+
   /** Instantiates an action to rewrite all absolute paths in table metadata. */
   default RewriteTablePath rewriteTablePath(Table table) {
     throw new UnsupportedOperationException(

diff --git a/api/src/main/java/org/apache/iceberg/actions/ComputePartitionStats.java b/api/src/main/java/org/apache/iceberg/actions/ComputePartitionStats.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import org.apache.iceberg.PartitionStatisticsFile;
+
+/**
+ * An action that computes and writes the partition statistics of an Iceberg table. Current snapshot
+ * is used by default.
+ */
+public interface ComputePartitionStats
+    extends Action<ComputePartitionStats, ComputePartitionStats.Result> {
+  /**
+   * Choose the table snapshot to compute partition stats.
+   *
+   * @param snapshotId long ID of the snapshot for which stats need to be computed
-   * @param snapshotId long ID of the snapshot for which stats need to be computed
+   * @param snapshotId long ID of the snapshot for which stats need to be computed, , by default the current snapshot is used.
-   * @param snapshotId long ID of the snapshot for which stats need to be computed
+   * @param snapshotId long ID of the snapshot for which stats need to be computed, , by default the current snapshot is used.
+   * @return this for method chaining
+   */
+  ComputePartitionStats snapshot(long snapshotId);
+
+  /** The result of partition statistics collection. */
+  interface Result {
+
+    /** Returns statistics file or null if no statistics were collected. */
+    PartitionStatisticsFile statisticsFile();
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
@@ -166,6 +166,12 @@ public static PartitionStatisticsFile computeAndWriteStatsFile(Table table, long
       stats =
           computeStats(table, snapshot.allManifests(table.io()), false /* incremental */).values();
     } else {
+      if (statisticsFile.snapshotId() == snapshotId) {
+        // no-op
+        LOG.info("Returning existing statistics file for snapshot {}", snapshotId);
+        return statisticsFile;
+      }
+
       try {
         stats = computeAndMergeStatsIncremental(table, snapshot, partitionType, statisticsFile);
       } catch (InvalidStatsFileException exception) {

diff --git a/core/src/main/java/org/apache/iceberg/actions/BaseComputePartitionStats.java b/core/src/main/java/org/apache/iceberg/actions/BaseComputePartitionStats.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.actions;
+
+import javax.annotation.Nullable;
+import org.apache.iceberg.PartitionStatisticsFile;
+import org.immutables.value.Value;
+
+@Value.Enclosing
+@SuppressWarnings("ImmutablesStyle")
+@Value.Style(
+    typeImmutableEnclosing = "ImmutableComputePartitionStats",
+    visibilityString = "PUBLIC",
+    builderVisibilityString = "PUBLIC")
+interface BaseComputePartitionStats extends ComputePartitionStats {
+
+  @Value.Immutable
+  interface Result extends ComputePartitionStats.Result {
+    @Override
+    @Nullable
+    PartitionStatisticsFile statisticsFile();
+  }
+}
diff --git a/...park/src/main/java/org/apache/iceberg/spark/actions/ComputePartitionStatsSparkAction.java b/...park/src/main/java/org/apache/iceberg/spark/actions/ComputePartitionStatsSparkAction.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.actions;
+
+import java.io.IOException;
+import org.apache.iceberg.PartitionStatisticsFile;
+import org.apache.iceberg.PartitionStatsHandler;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.ComputePartitionStats;
+import org.apache.iceberg.actions.ImmutableComputePartitionStats;
+import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.spark.JobGroupInfo;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Computes the stats incrementally after the snapshot that has partition stats file till the given
+ * snapshot (uses current snapshot if not specified) and writes the combined result into a {@link
+ * PartitionStatisticsFile} after merging the stats for a given snapshot. Does a full compute if
+ * previous statistics file does not exist. Also registers the {@link PartitionStatisticsFile} to
+ * table metadata.
+ */
+public class ComputePartitionStatsSparkAction
+    extends BaseSparkAction<ComputePartitionStatsSparkAction> implements ComputePartitionStats {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ComputePartitionStatsSparkAction.class);
+  private static final Result EMPTY_RESULT =
+      ImmutableComputePartitionStats.Result.builder().build();
+
+  private final Table table;
+  private Snapshot snapshot;
+
+  ComputePartitionStatsSparkAction(SparkSession spark, Table table) {
+    super(spark);
+    this.table = table;
+    this.snapshot = table.currentSnapshot();
+  }
+
+  @Override
+  protected ComputePartitionStatsSparkAction self() {
+    return this;
+  }
+
+  @Override
+  public ComputePartitionStats snapshot(long newSnapshotId) {
+    Snapshot newSnapshot = table.snapshot(newSnapshotId);
+    Preconditions.checkArgument(newSnapshot != null, "Snapshot not found: %s", newSnapshotId);
+    this.snapshot = newSnapshot;
+    return this;
+  }
+
+  @Override
+  public Result execute() {
+    if (snapshot == null) {
+      LOG.info("No snapshot to compute partition stats for table {}", table.name());
+      return EMPTY_RESULT;
+    }
+
+    JobGroupInfo info = newJobGroupInfo("COMPUTE-PARTITION-STATS", jobDesc());
+    return withJobGroupInfo(info, this::doExecute);
+  }
+
+  private Result doExecute() {
+    LOG.info("Computing partition stats for {} (snapshot {})", table.name(), snapshot.snapshotId());
-    LOG.info("Computing partition stats for {} (snapshot {})", table.name(), snapshot.snapshotId());
+    LOG.info("Computing partition stats for {} (snapshot={})", table.name(), snapshot.snapshotId());
-    LOG.info("Computing partition stats for {} (snapshot {})", table.name(), snapshot.snapshotId());
+    LOG.info("Computing partition stats for {} (snapshot={})", table.name(), snapshot.snapshotId());
+    PartitionStatisticsFile statisticsFile;
+    try {
+      statisticsFile = PartitionStatsHandler.computeAndWriteStatsFile(table, snapshot.snapshotId());
+    } catch (IOException e) {
+      throw new RuntimeIOException(e);
+    }
+
+    if (statisticsFile == null) {
+      return EMPTY_RESULT;
+    }
+
+    table.updatePartitionStatistics().setPartitionStatistics(statisticsFile).commit();
+    return ImmutableComputePartitionStats.Result.builder().statisticsFile(statisticsFile).build();
+  }
+
+  private String jobDesc() {
+    return String.format(
+        "Computing partition stats for %s (snapshot=%s)", table.name(), snapshot.snapshotId());
+  }
+}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkActions.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/SparkActions.java
@@ -20,6 +20,7 @@
 
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.ActionsProvider;
+import org.apache.iceberg.actions.ComputePartitionStats;
 import org.apache.iceberg.actions.ComputeTableStats;
 import org.apache.iceberg.actions.RemoveDanglingDeleteFiles;
 import org.apache.iceberg.spark.Spark3Util;
@@ -104,6 +105,11 @@ public ComputeTableStats computeTableStats(Table table) {
     return new ComputeTableStatsSparkAction(spark, table);
   }
 
+  @Override
+  public ComputePartitionStats computePartitionStats(Table table) {
+    return new ComputePartitionStatsSparkAction(spark, table);
+  }
+
   @Override
   public RemoveDanglingDeleteFiles removeDanglingDeleteFiles(Table table) {
     return new RemoveDanglingDeletesSparkAction(spark, table);