apache · viirya · Feb 3, 2021 · Feb 4, 2021 · Feb 4, 2021 · Feb 13, 2021
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/CustomMetric.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/CustomMetric.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.Scan;
+
+/**
+ * A custom metric. This is a logical representation of a metric reported by data sources during
+ * read path. Data sources can report supported metric list by {@link Scan} to Spark in query
+ * planning. During query execution, Spark will collect the metrics per partition by
+ * {@link PartitionReader} and combine metrics from partitions to the final result. How Spark
+ * combines metrics depends on the metric type. For streaming query, Spark will collect and combine
+ * metrics for a final result per micro batch.
+ *
+ * The metrics will be gathered during query execution back to the driver and then combined. The
+ * final result will be shown up in the physical operator in Spark UI.
+ *
+ * @since 3.2.0
+ */
+@Evolving
+public interface CustomMetric {
+    /**
+     * Returns the name of custom metric.
+     */
+    String name();
+
+    /**
+     * Returns the description of custom metric.
+     */
+    String description();
+
+    /**
+     * Supported metric type. The metric types must be supported by Spark SQL internal metrics.
+     * SUM: Spark sums up metrics from partitions as the final result.
+     */
+    enum MetricType {
+      SUM
+    }
+
+    /**
+     * Returns the type of custom metric.
+     */
+    MetricType type();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/LongMetric.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/LongMetric.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A custom metric that reports a long value.
+ *
+ * @since 3.2.0
+ */
+@Evolving
+public interface LongMetric extends CustomMetric {
+  /**
+   * Returns the value of custom metric.
+   */
+  long value();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
@@ -21,6 +21,7 @@
 import java.io.IOException;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.CustomMetric;
 
 /**
  * A partition reader returned by {@link PartitionReaderFactory#createReader(InputPartition)} or
@@ -48,4 +49,12 @@ public interface PartitionReader<T> extends Closeable {
    * Return the current record. This method should return same value until `next` is called.
    */
   T get();
+
+  /**
+   * Returns an array of custom metrics. By default it returns empty array.
+   */
+  default CustomMetric[] getCustomMetrics() {
+    CustomMetric[] NO_METRICS = {};
+    return NO_METRICS;
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.CustomMetric;
 import org.apache.spark.sql.connector.read.streaming.ContinuousStream;
 import org.apache.spark.sql.connector.read.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
@@ -102,4 +103,13 @@ default MicroBatchStream toMicroBatchStream(String checkpointLocation) {
   default ContinuousStream toContinuousStream(String checkpointLocation) {
     throw new UnsupportedOperationException(description() + ": Continuous scan are not supported");
   }
+
+  /**
+   * Returns an array of supported custom metrics with name and description.
+   * By default it returns empty array.
+   */
+  default CustomMetric[] supportedCustomMetrics() {
+    CustomMetric[] NO_METRICS = {};
+    return NO_METRICS;
+  }
 }