diff --git a/velox/common/base/Counters.cpp b/velox/common/base/Counters.cpp
index ba7eefc9a08b..ef7cfdf815c3 100644
--- a/velox/common/base/Counters.cpp
+++ b/velox/common/base/Counters.cpp
@@ -586,6 +586,42 @@ void registerVeloxMetrics() {
   // The number of data size exchange requests.
   DEFINE_METRIC(kMetricExchangeDataSizeCount, facebook::velox::StatType::COUNT);
 
+  /// ================== Index Lookup Counters =================
+  // The distribution of index lookup result raw bytes in range of [0, 128MB]
+  // with 128 buckets. It is configured to report the capacity at P50, P90, P99,
+  // and P100 percentiles.
+  DEFINE_HISTOGRAM_METRIC(
+      kMetricIndexLookupResultRawBytes,
+      1L << 20,
+      0,
+      128L << 20,
+      50,
+      90,
+      99,
+      100);
+
+  // The distribution of index lookup result bytes in range of [0, 128MB] with
+  // 128 buckets. It is configured to report the capacity at P50, P90, P99, and
+  // P100 percentiles.
+  DEFINE_HISTOGRAM_METRIC(
+      kMetricIndexLookupResultBytes, 1L << 20, 0, 128L << 20, 50, 90, 99, 100);
+
+  // The time distribution of index lookup time in range of [0, 16s] with 512
+  // buckets and reports P50, P90, P99, and P100.
+  DEFINE_HISTOGRAM_METRIC(
+      kMetricIndexLookupTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100);
+
+  // The time distribution of index lookup wait time in range of [0, 16s] with
+  // 512 buckets and reports P50, P90, P99, and P100.
+  DEFINE_HISTOGRAM_METRIC(
+      kMetricIndexLookupWaitTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100);
+
+  /// ================== Table Scan Counters =================
+  // The time distribution of table scan batch processing time in range of [0,
+  // 16s] with 512 buckets and reports P50, P90, P99, and P100.
+  DEFINE_HISTOGRAM_METRIC(
+      kMetricTableScanBatchProcessTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100);
+
   /// ================== Storage Counters =================
 
   // The time distribution of storage IO throttled duration in range of [0, 30s]
diff --git a/velox/common/base/Counters.h b/velox/common/base/Counters.h
index e16b04a57ff5..bf48992f95df 100644
--- a/velox/common/base/Counters.h
+++ b/velox/common/base/Counters.h
@@ -360,4 +360,19 @@ constexpr folly::StringPiece kMetricStorageGlobalThrottled{
 
 constexpr folly::StringPiece kMetricStorageNetworkThrottled{
     "velox.storage_network_throttled_count"};
+
+constexpr folly::StringPiece kMetricIndexLookupResultRawBytes{
+    "velox.index_lookup_result_raw_bytes"};
+
+constexpr folly::StringPiece kMetricIndexLookupResultBytes{
+    "velox.index_lookup_result_bytes"};
+
+constexpr folly::StringPiece kMetricIndexLookupTimeMs{
+    "velox.index_lookup_time_ms"};
+
+constexpr folly::StringPiece kMetricIndexLookupWaitTimeMs{
+    "velox.index_lookup_wait_time_ms"};
+
+constexpr folly::StringPiece kMetricTableScanBatchProcessTimeMs{
+    "velox.table_scan_batch_process_time_ms"};
 } // namespace facebook::velox
diff --git a/velox/common/base/RuntimeMetrics.cpp b/velox/common/base/RuntimeMetrics.cpp
index 70355f118cce..691eb9f4a403 100644
--- a/velox/common/base/RuntimeMetrics.cpp
+++ b/velox/common/base/RuntimeMetrics.cpp
@@ -53,18 +53,20 @@ void RuntimeMetric::printMetric(std::stringstream& stream) const {
     case RuntimeCounter::Unit::kNanos:
       stream << " sum: " << succinctNanos(sum) << ", count: " << count
              << ", min: " << succinctNanos(min)
-             << ", max: " << succinctNanos(max);
+             << ", max: " << succinctNanos(max)
+             << ", avg: " << succinctNanos(count == 0 ? 0 : sum / count);
       break;
     case RuntimeCounter::Unit::kBytes:
       stream << " sum: " << succinctBytes(sum) << ", count: " << count
              << ", min: " << succinctBytes(min)
-             << ", max: " << succinctBytes(max);
+             << ", max: " << succinctBytes(max)
+             << ", avg: " << succinctBytes(count == 0 ? 0 : sum / count);
       break;
     case RuntimeCounter::Unit::kNone:
       [[fallthrough]];
     default:
       stream << " sum: " << sum << ", count: " << count << ", min: " << min
-             << ", max: " << max;
+             << ", max: " << max << ", avg: " << (count == 0 ? 0 : sum / count);
   }
 }
 
@@ -72,25 +74,32 @@ std::string RuntimeMetric::toString() const {
   switch (unit) {
     case RuntimeCounter::Unit::kNanos:
       return fmt::format(
-          "sum:{}, count:{}, min:{}, max:{}",
+          "sum:{}, count:{}, min:{}, max:{}, avg: {}",
           succinctNanos(sum),
           count,
           succinctNanos(min),
-          succinctNanos(max));
+          succinctNanos(max),
+          succinctNanos(count == 0 ? 0 : sum / count));
       break;
     case RuntimeCounter::Unit::kBytes:
       return fmt::format(
-          "sum:{}, count:{}, min:{}, max:{}",
+          "sum:{}, count:{}, min:{}, max:{}, avg: {}",
           succinctBytes(sum),
           count,
           succinctBytes(min),
-          succinctBytes(max));
+          succinctBytes(max),
+          succinctBytes(count == 0 ? 0 : sum / count));
       break;
     case RuntimeCounter::Unit::kNone:
       [[fallthrough]];
     default:
       return fmt::format(
-          "sum:{}, count:{}, min:{}, max:{}", sum, count, min, max);
+          "sum:{}, count:{}, min:{}, max:{}, avg: {}",
+          sum,
+          count,
+          min,
+          max,
+          count == 0 ? 0 : sum / count);
   }
 }
 
diff --git a/velox/common/base/tests/RuntimeMetricsTest.cpp b/velox/common/base/tests/RuntimeMetricsTest.cpp
index 86c0585e13e0..7180f682a80b 100644
--- a/velox/common/base/tests/RuntimeMetricsTest.cpp
+++ b/velox/common/base/tests/RuntimeMetricsTest.cpp
@@ -49,11 +49,12 @@ TEST_F(RuntimeMetricsTest, basic) {
 
   ASSERT_EQ(
       fmt::format(
-          "sum:{}, count:{}, min:{}, max:{}",
+          "sum:{}, count:{}, min:{}, max:{}, avg: {}",
           rm1.sum,
           rm1.count,
           rm1.min,
-          rm1.max),
+          rm1.max,
+          rm1.sum / rm1.count),
       rm1.toString());
 
   RuntimeMetric rm2;
@@ -74,11 +75,13 @@ TEST_F(RuntimeMetricsTest, basic) {
 
   RuntimeMetric byteRm(RuntimeCounter::Unit::kBytes);
   byteRm.addValue(5);
-  ASSERT_EQ(byteRm.toString(), "sum:5B, count:1, min:5B, max:5B");
+  ASSERT_EQ(byteRm.toString(), "sum:5B, count:1, min:5B, max:5B, avg: 5B");
 
   RuntimeMetric timeRm(RuntimeCounter::Unit::kNanos);
   timeRm.addValue(2'000);
-  ASSERT_EQ(timeRm.toString(), "sum:2.00us, count:1, min:2.00us, max:2.00us");
+  ASSERT_EQ(
+      timeRm.toString(),
+      "sum:2.00us, count:1, min:2.00us, max:2.00us, avg: 2.00us");
 }
 
 } // namespace facebook::velox
diff --git a/velox/docs/monitoring/metrics.rst b/velox/docs/monitoring/metrics.rst
index 870b77f2e0ae..13f797d0a810 100644
--- a/velox/docs/monitoring/metrics.rst
+++ b/velox/docs/monitoring/metrics.rst
@@ -560,3 +560,47 @@ Hive Connector
      - The distribution of hive sort writer finish processing time slice in range
        of[0, 120s] with 60 buckets. It is configured to report latency at P50,
        P90, P99, and P100 percentiles.
+
+Index Join
+----------
+
+.. list-table::
+   :widths: 40 10 50
+   :header-rows: 1
+
+   * - Metric Name
+     - Type
+     - Description
+   * - index_lookup_wait_time_ms
+     - Histogram
+     - The time distribution of index lookup time in range of [0, 16s] with 512
+       buckets and reports P50, P90, P99, and P100.
+   * - index_lookup_wait_time_ms
+     - Histogram
+     - The time distribution of index lookup time in range of [0, 16s] with 512
+       buckets and reports P50, P90, P99, and P100.
+   * - index_lookup_result_raw_bytes
+     - Histogram
+     - The distribution of index lookup result raw bytes in range of [0, 128MB]
+       with 128 buckets. It is configured to report the capacity at P50, P90, P99,
+       and P100 percentiles.
+   * - index_lookup_result_bytes
+     - Histogram
+     - The distribution of index lookup result bytes in range of [0, 128MB] with
+       128 buckets. It is configured to report the capacity at P50, P90, P99, and
+       P100 percentiles.
+
+Table Scan
+----------
+
+.. list-table::
+   :widths: 40 10 50
+   :header-rows: 1
+
+   * - Metric Name
+     - Type
+     - Description
+   * - table_scan_batch_process_time_ms
+     - Histogram
+     - The time distribution of table scan batch processing time in range of [0,
+       16s] with 512 buckets and reports P50, P90, P99, and P100.
diff --git a/velox/docs/monitoring/stats.rst b/velox/docs/monitoring/stats.rst
index e9951002ead7..821162ad4b4c 100644
--- a/velox/docs/monitoring/stats.rst
+++ b/velox/docs/monitoring/stats.rst
@@ -142,12 +142,32 @@ These stats are reported only by IndexLookupJoin operator
    * - Stats
      - Unit
      - Description
-   * - lookupWallNanos
+   * - connectorlookupWallNanos
      - nanos
-     - The walltime in nanoseconds that the index connector do the lookup.
-   * - lookupCpuNanos
+     - The end-to-end walltime in nanoseconds that the index connector do the lookup.
+   * - connectorlookupWaitWallNanos
      - nanos
-     - The cpu time in nanoseconds that the index connector do the lookup.
+     - The walltime in nanoseconds that the index connector wait for the lookup from
+       remote storage.
+   * - connectorResultPrepareCpuNanos
+     - nanos
+     - The cpu time in nanoseconds that the index connector process response from storages
+       client for followup processing by index join operator.
+   * - clientRequestProcessCpuNanos
+     - nanos
+     - The cpu time in nanoseconds that the storage client process request for remote
+       storage lookup such as encoding the lookup input data into remotr storage request.
+   * - clientResultProcessCpuNanos
+     - nanos
+     - The cpu time in nanoseconds that the storage client process response from remote
+       storage lookup such as decoding the response data into velox vectors.
+   * - clientLookupResultRawSize
+     - bytes
+     - The byte size of the raw result received from the remote storage lookup.
+   * - clientLookupResultSize
+     - bytes
+     - The byte size of the result data in velox vectors that are decoded from the raw data
+       received from the remote storage lookup.
 
 Spilling
 --------
diff --git a/velox/exec/IndexLookupJoin.cpp b/velox/exec/IndexLookupJoin.cpp
index a762567e5957..4ba9c1ab6b59 100644
--- a/velox/exec/IndexLookupJoin.cpp
+++ b/velox/exec/IndexLookupJoin.cpp
@@ -779,14 +779,15 @@ void IndexLookupJoin::recordConnectorStats() {
     lockedStats->runtimeStats.erase(name);
     lockedStats->runtimeStats.emplace(name, std::move(value));
   }
-  if (connectorStats.count(kConnectorLookupCpuTime) != 0) {
-    VELOX_CHECK_EQ(
-        connectorStats[kConnectorLookupCpuTime].count,
-        connectorStats[kConnectorLookupWallTime].count);
+  if (connectorStats.count(kConnectorLookupWallTime) != 0) {
     const CpuWallTiming backgroundTiming{
-        static_cast<uint64_t>(connectorStats[kConnectorLookupCpuTime].count),
+        static_cast<uint64_t>(connectorStats[kConnectorLookupWallTime].count),
         static_cast<uint64_t>(connectorStats[kConnectorLookupWallTime].sum),
-        static_cast<uint64_t>(connectorStats[kConnectorLookupCpuTime].sum)};
+        // NOTE: this might not be accurate as it doesn't include the time spent
+        // inside the index storage client.
+        static_cast<uint64_t>(connectorStats[kConnectorResultPrepareTime].sum) +
+            connectorStats[kClientRequestProcessTime].sum +
+            connectorStats[kClientResultProcessTime].sum};
     lockedStats->backgroundTiming.clear();
     lockedStats->backgroundTiming.add(backgroundTiming);
   }
diff --git a/velox/exec/IndexLookupJoin.h b/velox/exec/IndexLookupJoin.h
index d7334b5cc68c..4f9940c8126f 100644
--- a/velox/exec/IndexLookupJoin.h
+++ b/velox/exec/IndexLookupJoin.h
@@ -42,10 +42,35 @@ class IndexLookupJoin : public Operator {
   void close() override;
 
   /// Defines lookup runtime stats.
-  /// The walltime time that the index connector do the lookup.
-  static inline const std::string kConnectorLookupWallTime{"lookupWallNanos"};
-  /// The cpu time that the index connector do the lookup.
-  static inline const std::string kConnectorLookupCpuTime{"lookupCpuNanos"};
+  /// The end-to-end walltime in nanoseconds that the index connector do the
+  /// lookup.
+  static inline const std::string kConnectorLookupWallTime{
+      "connectorLookupWallNanos"};
+  /// The cpu time in nanoseconds that the index connector process response from
+  /// storage client for followup processing by index join operator.
+  static inline const std::string kConnectorResultPrepareTime{
+      "connectorResultPrepareCpuNanos"};
+  /// The cpu time in nanoseconds that the storage client process request for
+  /// remote storage lookup such as encoding the lookup input data into remotr
+  /// storage request.
+  static inline const std::string kClientRequestProcessTime{
+      "clientRequestProcessCpuNanos"};
+  /// The walltime in nanoseconds that the storage client wait for the lookup
+  /// from remote storage.
+  static inline const std::string kClientLookupWaitWallTime{
+      "clientlookupWaitWallNanos"};
+  /// The cpu time in nanoseconds that the storage client process response from
+  /// remote storage lookup such as decoding the response data into velox
+  /// vectors.
+  static inline const std::string kClientResultProcessTime{
+      "clientResultProcessCpuNanos"};
+  /// The byte size of the raw result received from the remote storage lookup.
+  static inline const std::string kClientLookupResultRawSize{
+      "clientLookupResultRawSize"};
+  /// The byte size of the result data in velox vectors that are decoded from
+  /// the raw data received from the remote storage lookup.
+  static inline const std::string kClientLookupResultSize{
+      "clientLookupResultSize"};
 
  private:
   using LookupResultIter = connector::IndexSource::LookupResultIterator;
diff --git a/velox/exec/TableScan.cpp b/velox/exec/TableScan.cpp
index 232786f14ce1..67d1c5ff9f8c 100644
--- a/velox/exec/TableScan.cpp
+++ b/velox/exec/TableScan.cpp
@@ -282,6 +282,10 @@ RowVectorPtr TableScan::getOutput() {
               {maxFilteringRatio_,
                1.0 * data->size() / readBatchSize,
                1.0 / kMaxSelectiveBatchSizeMultiplier});
+          if (ioTimeUs > 0) {
+            RECORD_HISTOGRAM_METRIC_VALUE(
+                velox::kMetricTableScanBatchProcessTimeMs, ioTimeUs / 1'000);
+          }
           return data;
         }
         continue;
diff --git a/velox/exec/tests/IndexLookupJoinTest.cpp b/velox/exec/tests/IndexLookupJoinTest.cpp
index 65c4529ae352..967aa809c837 100644
--- a/velox/exec/tests/IndexLookupJoinTest.cpp
+++ b/velox/exec/tests/IndexLookupJoinTest.cpp
@@ -1817,13 +1817,22 @@ DEBUG_ONLY_TEST_P(IndexLookupJoinTest, runtimeStats) {
       numProbeBatches);
   ASSERT_GT(runtimeStats.at(IndexLookupJoin::kConnectorLookupWallTime).sum, 0);
   ASSERT_EQ(
-      runtimeStats.at(IndexLookupJoin::kConnectorLookupCpuTime).count,
+      runtimeStats.at(IndexLookupJoin::kClientLookupWaitWallTime).count,
       numProbeBatches);
-  ASSERT_GT(runtimeStats.at(IndexLookupJoin::kConnectorLookupCpuTime).sum, 0);
+  ASSERT_GT(runtimeStats.at(IndexLookupJoin::kClientLookupWaitWallTime).sum, 0);
+  ASSERT_EQ(
+      runtimeStats.at(IndexLookupJoin::kConnectorResultPrepareTime).count,
+      numProbeBatches);
+  ASSERT_GT(
+      runtimeStats.at(IndexLookupJoin::kConnectorResultPrepareTime).sum, 0);
+  ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientRequestProcessTime), 0);
+  ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientResultProcessTime), 0);
+  ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientLookupResultSize), 0);
+  ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientLookupResultRawSize), 0);
   ASSERT_THAT(
       operatorStats.toString(true, true),
       testing::MatchesRegex(
-          ".*Runtime stats.*lookupWallNanos.*lookupCpuNanos.*"));
+          ".*Runtime stats.*connectorLookupWallNanos:.*clientlookupWaitWallNanos.*connectorResultPrepareCpuNanos.*"));
 }
 
 TEST_P(IndexLookupJoinTest, joinFuzzer) {
diff --git a/velox/exec/tests/PrintPlanWithStatsTest.cpp b/velox/exec/tests/PrintPlanWithStatsTest.cpp
index b6dae05aa6e5..f6490da97dd6 100644
--- a/velox/exec/tests/PrintPlanWithStatsTest.cpp
+++ b/velox/exec/tests/PrintPlanWithStatsTest.cpp
@@ -163,28 +163,26 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) {
        {"  -- HashJoin\\[3\\]\\[INNER c0=u_c0\\] -> c0:INTEGER, c1:BIGINT, u_c1:BIGINT"},
        {"     Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"},
        {"     HashBuild: Input: 100 rows \\(.+\\), Output: 0 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"},
-       {"        distinctKey0\\s+sum: 101, count: 1, min: 101, max: 101"},
+       {"        distinctKey0\\s+sum: 101, count: 1, min: 101, max: 101, avg: 101"},
        {"        hashtable.buildWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
-       {"        hashtable.capacity\\s+sum: 200, count: 1, min: 200, max: 200"},
-       {"        hashtable.numDistinct\\s+sum: 100, count: 1, min: 100, max: 100"},
-       {"        hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1"},
+       {"        hashtable.capacity\\s+sum: 200, count: 1, min: 200, max: 200, avg: 200"},
+       {"        hashtable.numDistinct\\s+sum: 100, count: 1, min: 100, max: 100, avg: 100"},
+       {"        hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1, avg: 1"},
        {"        queuedWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
-       {"        rangeKey0\\s+sum: 200, count: 1, min: 200, max: 200"},
+       {"        rangeKey0\\s+sum: 200, count: 1, min: 200, max: 200, avg: 200"},
        {"        runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"        runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"        runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"     HashProbe: Input: 2000 rows \\(.+\\), Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"},
        // These lines may or may not appear depending on whether the operator
        // gets blocked during a run.
-       {"        blockedWaitForJoinBuildTimes        sum: 1, count: 1, min: 1, max: 1",
-        true},
-       {"        blockedWaitForJoinBuildWallNanos\\s+sum: .+, count: 1, min: .+, max: .+",
-        true},
-       {"        dynamicFiltersProduced\\s+sum: 1, count: 1, min: 1, max: 1"},
-       {"        queuedWallNanos\\s+sum: .+, count: 1, min: .+, max: .+",
-        true}, // This line may or may not appear depending on how the threads
-               // running the Drivers are executed, this only appears if the
-               // HashProbe has to wait for the HashBuild construction.
+       {"        blockedWaitForJoinBuildTimes        sum: 1, count: 1, min: 1, max: 1, avg: 1"},
+       {"        blockedWaitForJoinBuildWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
+       {"        dynamicFiltersProduced\\s+sum: 1, count: 1, min: 1, max: 1, avg: 1"},
+       {"        queuedWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
+       // This line may or may not appear depending on how the threads
+       // running the Drivers are executed, this only appears if the
+       // HashProbe has to wait for the HashBuild construction.
        {"        runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"        runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"        runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
@@ -192,21 +190,19 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) {
        {"       Input: 2000 rows \\(.+\\), Raw Input: 20480 rows \\(.+\\), Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, Splits: 20, DynamicFilter producer plan nodes: 3, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"},
        {"          dataSourceAddSplitWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"},
        {"          dataSourceReadWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"},
-       {"          dynamicFiltersAccepted[ ]* sum: 1, count: 1, min: 1, max: 1"},
+       {"          dynamicFiltersAccepted[ ]* sum: 1, count: 1, min: 1, max: 1, avg: 1"},
        {"          footerBufferOverread[ ]* sum: .+, count: 1, min: .+, max: .+"},
        {"          ioWaitWallNanos      [ ]* sum: .+, count: .+ min: .+, max: .+"},
        {"          maxSingleIoWaitWallNanos[ ]*sum: .+, count: 1, min: .+, max: .+"},
        {"          numPrefetch         [ ]* sum: .+, count: 1, min: .+, max: .+"},
-       {"          numRamRead          [ ]* sum: 60, count: 1, min: 60, max: 60"},
+       {"          numRamRead          [ ]* sum: 60, count: 1, min: 60, max: 60, avg: 60"},
        {"          numStorageRead      [ ]* sum: .+, count: 1, min: .+, max: .+"},
        {"          numStripes[ ]* sum: .+, count: 1, min: .+, max: .+"},
-       {"          overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"},
+       {"          overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B, avg: 0B"},
        {"          prefetchBytes       [ ]* sum: .+, count: 1, min: .+, max: .+"},
-       {"          preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
-        true},
+       {"          preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+"},
        {"          ramReadBytes        [ ]* sum: .+, count: 1, min: .+, max: .+"},
-       {"          readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
-        true},
+       {"          readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+"},
        {"          runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"          runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
        {"          runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
@@ -272,11 +268,11 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) {
          {"      dataSourceLazyInputBytes\\s+sum: .+, count: .+, min: .+, max: .+"},
          {"      dataSourceLazyWallNanos\\s+sum: .+, count: .+, min: .+, max: .+"},
          {"      distinctKey0\\s+sum: .+, count: 1, min: .+, max: .+"},
-         {"      hashtable.capacity\\s+sum: (?:1273|1252), count: 1, min: (?:1273|1252), max: (?:1273|1252)"},
-         {"      hashtable.numDistinct\\s+sum: (?:849|835), count: 1, min: (?:849|835), max: (?:849|835)"},
-         {"      hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1"},
-         {"      hashtable.numTombstones\\s+sum: 0, count: 1, min: 0, max: 0"},
-         {"      loadedToValueHook\\s+sum: 50000, count: 5, min: 10000, max: 10000"},
+         {"      hashtable.capacity\\s+sum: (?:1273|1252), count: 1, min: (?:1273|1252), max: (?:1273|1252), avg: (?:1273|1252)"},
+         {"      hashtable.numDistinct\\s+sum: (?:849|835), count: 1, min: (?:849|835), max: (?:849|835), avg: (?:849|835)"},
+         {"      hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1, avg: 1"},
+         {"      hashtable.numTombstones\\s+sum: 0, count: 1, min: 0, max: 0, avg: 0"},
+         {"      loadedToValueHook\\s+sum: 50000, count: 5, min: 10000, max: 10000, avg: 10000"},
          {"      runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
          {"      runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
          {"      runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"},
@@ -288,10 +284,10 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) {
          {"        ioWaitWallNanos      [ ]* sum: .+, count: .+ min: .+, max: .+"},
          {"        maxSingleIoWaitWallNanos[ ]*sum: .+, count: 1, min: .+, max: .+"},
          {"        numPrefetch      [ ]* sum: .+, count: .+, min: .+, max: .+"},
-         {"        numRamRead       [ ]* sum: 7, count: 1, min: 7, max: 7"},
+         {"        numRamRead       [ ]* sum: 7, count: 1, min: 7, max: 7, avg: 7"},
          {"        numStorageRead   [ ]* sum: .+, count: 1, min: .+, max: .+"},
          {"        numStripes[ ]* sum: .+, count: 1, min: .+, max: .+"},
-         {"        overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"},
+         {"        overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B, avg: 0B"},
 
          {"        prefetchBytes    [ ]* sum: .+, count: 1, min: .+, max: .+"},
          {"        preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
@@ -358,10 +354,10 @@ TEST_F(PrintPlanWithStatsTest, tableWriterWithTableScan) {
        {"        ioWaitWallNanos      [ ]* sum: .+, count: .+ min: .+, max: .+"},
        {"        maxSingleIoWaitWallNanos[ ]*sum: .+, count: 1, min: .+, max: .+"},
        {"        numPrefetch      [ ]* sum: .+, count: .+, min: .+, max: .+"},
-       {"        numRamRead       [ ]* sum: 7, count: 1, min: 7, max: 7"},
+       {"        numRamRead       [ ]* sum: 7, count: 1, min: 7, max: 7, avg: 7"},
        {"        numStorageRead   [ ]* sum: .+, count: 1, min: .+, max: .+"},
        {"        numStripes[ ]* sum: .+, count: 1, min: .+, max: .+"},
-       {"        overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"},
+       {"        overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B, avg: 0B"},
 
        {"        prefetchBytes    [ ]* sum: .+, count: 1, min: .+, max: .+"},
        {"        preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
diff --git a/velox/exec/tests/utils/TestIndexStorageConnector.cpp b/velox/exec/tests/utils/TestIndexStorageConnector.cpp
index cc6fad549753..3dfdfe04e3b2 100644
--- a/velox/exec/tests/utils/TestIndexStorageConnector.cpp
+++ b/velox/exec/tests/utils/TestIndexStorageConnector.cpp
@@ -208,14 +208,26 @@ void TestIndexSource::initOutputProjections() {
 void TestIndexSource::recordCpuTiming(const CpuWallTiming& timing) {
   VELOX_CHECK_EQ(timing.count, 1);
   std::lock_guard<std::mutex> l(mutex_);
-  addOperatorRuntimeStats(
-      IndexLookupJoin::kConnectorLookupWallTime,
-      RuntimeCounter(timing.wallNanos, RuntimeCounter::Unit::kNanos),
-      runtimeStats_);
-  addOperatorRuntimeStats(
-      IndexLookupJoin::kConnectorLookupCpuTime,
-      RuntimeCounter(timing.cpuNanos, RuntimeCounter::Unit::kNanos),
-      runtimeStats_);
+  if (timing.wallNanos != 0) {
+    addOperatorRuntimeStats(
+        IndexLookupJoin::kConnectorLookupWallTime,
+        RuntimeCounter(timing.wallNanos, RuntimeCounter::Unit::kNanos),
+        runtimeStats_);
+    // This is just for testing purpose to check if the runtime stats has been
+    // set properly.
+    addOperatorRuntimeStats(
+        IndexLookupJoin::kClientLookupWaitWallTime,
+        RuntimeCounter(timing.wallNanos, RuntimeCounter::Unit::kNanos),
+        runtimeStats_);
+  }
+  if (timing.cpuNanos != 0) {
+    // This is just for testing purpose to check if the runtime stats has been
+    // set properly.
+    addOperatorRuntimeStats(
+        IndexLookupJoin::kConnectorResultPrepareTime,
+        RuntimeCounter(timing.cpuNanos, RuntimeCounter::Unit::kNanos),
+        runtimeStats_);
+  }
 }
 
 std::unordered_map<std::string, RuntimeMetric> TestIndexSource::runtimeStats() {