diff --git a/velox/common/base/Counters.cpp b/velox/common/base/Counters.cpp index ba7eefc9a08b..ef7cfdf815c3 100644 --- a/velox/common/base/Counters.cpp +++ b/velox/common/base/Counters.cpp @@ -586,6 +586,42 @@ void registerVeloxMetrics() { // The number of data size exchange requests. DEFINE_METRIC(kMetricExchangeDataSizeCount, facebook::velox::StatType::COUNT); + /// ================== Index Lookup Counters ================= + // The distribution of index lookup result raw bytes in range of [0, 128MB] + // with 128 buckets. It is configured to report the capacity at P50, P90, P99, + // and P100 percentiles. + DEFINE_HISTOGRAM_METRIC( + kMetricIndexLookupResultRawBytes, + 1L << 20, + 0, + 128L << 20, + 50, + 90, + 99, + 100); + + // The distribution of index lookup result bytes in range of [0, 128MB] with + // 128 buckets. It is configured to report the capacity at P50, P90, P99, and + // P100 percentiles. + DEFINE_HISTOGRAM_METRIC( + kMetricIndexLookupResultBytes, 1L << 20, 0, 128L << 20, 50, 90, 99, 100); + + // The time distribution of index lookup time in range of [0, 16s] with 512 + // buckets and reports P50, P90, P99, and P100. + DEFINE_HISTOGRAM_METRIC( + kMetricIndexLookupTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100); + + // The time distribution of index lookup wait time in range of [0, 16s] with + // 512 buckets and reports P50, P90, P99, and P100. + DEFINE_HISTOGRAM_METRIC( + kMetricIndexLookupWaitTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100); + + /// ================== Table Scan Counters ================= + // The time distribution of table scan batch processing time in range of [0, + // 16s] with 512 buckets and reports P50, P90, P99, and P100. + DEFINE_HISTOGRAM_METRIC( + kMetricTableScanBatchProcessTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100); + /// ================== Storage Counters ================= // The time distribution of storage IO throttled duration in range of [0, 30s] diff --git a/velox/common/base/Counters.h b/velox/common/base/Counters.h index e16b04a57ff5..bf48992f95df 100644 --- a/velox/common/base/Counters.h +++ b/velox/common/base/Counters.h @@ -360,4 +360,19 @@ constexpr folly::StringPiece kMetricStorageGlobalThrottled{ constexpr folly::StringPiece kMetricStorageNetworkThrottled{ "velox.storage_network_throttled_count"}; + +constexpr folly::StringPiece kMetricIndexLookupResultRawBytes{ + "velox.index_lookup_result_raw_bytes"}; + +constexpr folly::StringPiece kMetricIndexLookupResultBytes{ + "velox.index_lookup_result_bytes"}; + +constexpr folly::StringPiece kMetricIndexLookupTimeMs{ + "velox.index_lookup_time_ms"}; + +constexpr folly::StringPiece kMetricIndexLookupWaitTimeMs{ + "velox.index_lookup_wait_time_ms"}; + +constexpr folly::StringPiece kMetricTableScanBatchProcessTimeMs{ + "velox.table_scan_batch_process_time_ms"}; } // namespace facebook::velox diff --git a/velox/common/base/RuntimeMetrics.cpp b/velox/common/base/RuntimeMetrics.cpp index 70355f118cce..691eb9f4a403 100644 --- a/velox/common/base/RuntimeMetrics.cpp +++ b/velox/common/base/RuntimeMetrics.cpp @@ -53,18 +53,20 @@ void RuntimeMetric::printMetric(std::stringstream& stream) const { case RuntimeCounter::Unit::kNanos: stream << " sum: " << succinctNanos(sum) << ", count: " << count << ", min: " << succinctNanos(min) - << ", max: " << succinctNanos(max); + << ", max: " << succinctNanos(max) + << ", avg: " << succinctNanos(count == 0 ? 0 : sum / count); break; case RuntimeCounter::Unit::kBytes: stream << " sum: " << succinctBytes(sum) << ", count: " << count << ", min: " << succinctBytes(min) - << ", max: " << succinctBytes(max); + << ", max: " << succinctBytes(max) + << ", avg: " << succinctBytes(count == 0 ? 0 : sum / count); break; case RuntimeCounter::Unit::kNone: [[fallthrough]]; default: stream << " sum: " << sum << ", count: " << count << ", min: " << min - << ", max: " << max; + << ", max: " << max << ", avg: " << (count == 0 ? 0 : sum / count); } } @@ -72,25 +74,32 @@ std::string RuntimeMetric::toString() const { switch (unit) { case RuntimeCounter::Unit::kNanos: return fmt::format( - "sum:{}, count:{}, min:{}, max:{}", + "sum:{}, count:{}, min:{}, max:{}, avg: {}", succinctNanos(sum), count, succinctNanos(min), - succinctNanos(max)); + succinctNanos(max), + succinctNanos(count == 0 ? 0 : sum / count)); break; case RuntimeCounter::Unit::kBytes: return fmt::format( - "sum:{}, count:{}, min:{}, max:{}", + "sum:{}, count:{}, min:{}, max:{}, avg: {}", succinctBytes(sum), count, succinctBytes(min), - succinctBytes(max)); + succinctBytes(max), + succinctBytes(count == 0 ? 0 : sum / count)); break; case RuntimeCounter::Unit::kNone: [[fallthrough]]; default: return fmt::format( - "sum:{}, count:{}, min:{}, max:{}", sum, count, min, max); + "sum:{}, count:{}, min:{}, max:{}, avg: {}", + sum, + count, + min, + max, + count == 0 ? 0 : sum / count); } } diff --git a/velox/common/base/tests/RuntimeMetricsTest.cpp b/velox/common/base/tests/RuntimeMetricsTest.cpp index 86c0585e13e0..7180f682a80b 100644 --- a/velox/common/base/tests/RuntimeMetricsTest.cpp +++ b/velox/common/base/tests/RuntimeMetricsTest.cpp @@ -49,11 +49,12 @@ TEST_F(RuntimeMetricsTest, basic) { ASSERT_EQ( fmt::format( - "sum:{}, count:{}, min:{}, max:{}", + "sum:{}, count:{}, min:{}, max:{}, avg: {}", rm1.sum, rm1.count, rm1.min, - rm1.max), + rm1.max, + rm1.sum / rm1.count), rm1.toString()); RuntimeMetric rm2; @@ -74,11 +75,13 @@ TEST_F(RuntimeMetricsTest, basic) { RuntimeMetric byteRm(RuntimeCounter::Unit::kBytes); byteRm.addValue(5); - ASSERT_EQ(byteRm.toString(), "sum:5B, count:1, min:5B, max:5B"); + ASSERT_EQ(byteRm.toString(), "sum:5B, count:1, min:5B, max:5B, avg: 5B"); RuntimeMetric timeRm(RuntimeCounter::Unit::kNanos); timeRm.addValue(2'000); - ASSERT_EQ(timeRm.toString(), "sum:2.00us, count:1, min:2.00us, max:2.00us"); + ASSERT_EQ( + timeRm.toString(), + "sum:2.00us, count:1, min:2.00us, max:2.00us, avg: 2.00us"); } } // namespace facebook::velox diff --git a/velox/docs/monitoring/metrics.rst b/velox/docs/monitoring/metrics.rst index 870b77f2e0ae..13f797d0a810 100644 --- a/velox/docs/monitoring/metrics.rst +++ b/velox/docs/monitoring/metrics.rst @@ -560,3 +560,47 @@ Hive Connector - The distribution of hive sort writer finish processing time slice in range of[0, 120s] with 60 buckets. It is configured to report latency at P50, P90, P99, and P100 percentiles. + +Index Join +---------- + +.. list-table:: + :widths: 40 10 50 + :header-rows: 1 + + * - Metric Name + - Type + - Description + * - index_lookup_wait_time_ms + - Histogram + - The time distribution of index lookup time in range of [0, 16s] with 512 + buckets and reports P50, P90, P99, and P100. + * - index_lookup_wait_time_ms + - Histogram + - The time distribution of index lookup time in range of [0, 16s] with 512 + buckets and reports P50, P90, P99, and P100. + * - index_lookup_result_raw_bytes + - Histogram + - The distribution of index lookup result raw bytes in range of [0, 128MB] + with 128 buckets. It is configured to report the capacity at P50, P90, P99, + and P100 percentiles. + * - index_lookup_result_bytes + - Histogram + - The distribution of index lookup result bytes in range of [0, 128MB] with + 128 buckets. It is configured to report the capacity at P50, P90, P99, and + P100 percentiles. + +Table Scan +---------- + +.. list-table:: + :widths: 40 10 50 + :header-rows: 1 + + * - Metric Name + - Type + - Description + * - table_scan_batch_process_time_ms + - Histogram + - The time distribution of table scan batch processing time in range of [0, + 16s] with 512 buckets and reports P50, P90, P99, and P100. diff --git a/velox/docs/monitoring/stats.rst b/velox/docs/monitoring/stats.rst index e9951002ead7..821162ad4b4c 100644 --- a/velox/docs/monitoring/stats.rst +++ b/velox/docs/monitoring/stats.rst @@ -142,12 +142,32 @@ These stats are reported only by IndexLookupJoin operator * - Stats - Unit - Description - * - lookupWallNanos + * - connectorlookupWallNanos - nanos - - The walltime in nanoseconds that the index connector do the lookup. - * - lookupCpuNanos + - The end-to-end walltime in nanoseconds that the index connector do the lookup. + * - connectorlookupWaitWallNanos - nanos - - The cpu time in nanoseconds that the index connector do the lookup. + - The walltime in nanoseconds that the index connector wait for the lookup from + remote storage. + * - connectorResultPrepareCpuNanos + - nanos + - The cpu time in nanoseconds that the index connector process response from storages + client for followup processing by index join operator. + * - clientRequestProcessCpuNanos + - nanos + - The cpu time in nanoseconds that the storage client process request for remote + storage lookup such as encoding the lookup input data into remotr storage request. + * - clientResultProcessCpuNanos + - nanos + - The cpu time in nanoseconds that the storage client process response from remote + storage lookup such as decoding the response data into velox vectors. + * - clientLookupResultRawSize + - bytes + - The byte size of the raw result received from the remote storage lookup. + * - clientLookupResultSize + - bytes + - The byte size of the result data in velox vectors that are decoded from the raw data + received from the remote storage lookup. Spilling -------- diff --git a/velox/exec/IndexLookupJoin.cpp b/velox/exec/IndexLookupJoin.cpp index a762567e5957..4ba9c1ab6b59 100644 --- a/velox/exec/IndexLookupJoin.cpp +++ b/velox/exec/IndexLookupJoin.cpp @@ -779,14 +779,15 @@ void IndexLookupJoin::recordConnectorStats() { lockedStats->runtimeStats.erase(name); lockedStats->runtimeStats.emplace(name, std::move(value)); } - if (connectorStats.count(kConnectorLookupCpuTime) != 0) { - VELOX_CHECK_EQ( - connectorStats[kConnectorLookupCpuTime].count, - connectorStats[kConnectorLookupWallTime].count); + if (connectorStats.count(kConnectorLookupWallTime) != 0) { const CpuWallTiming backgroundTiming{ - static_cast(connectorStats[kConnectorLookupCpuTime].count), + static_cast(connectorStats[kConnectorLookupWallTime].count), static_cast(connectorStats[kConnectorLookupWallTime].sum), - static_cast(connectorStats[kConnectorLookupCpuTime].sum)}; + // NOTE: this might not be accurate as it doesn't include the time spent + // inside the index storage client. + static_cast(connectorStats[kConnectorResultPrepareTime].sum) + + connectorStats[kClientRequestProcessTime].sum + + connectorStats[kClientResultProcessTime].sum}; lockedStats->backgroundTiming.clear(); lockedStats->backgroundTiming.add(backgroundTiming); } diff --git a/velox/exec/IndexLookupJoin.h b/velox/exec/IndexLookupJoin.h index d7334b5cc68c..4f9940c8126f 100644 --- a/velox/exec/IndexLookupJoin.h +++ b/velox/exec/IndexLookupJoin.h @@ -42,10 +42,35 @@ class IndexLookupJoin : public Operator { void close() override; /// Defines lookup runtime stats. - /// The walltime time that the index connector do the lookup. - static inline const std::string kConnectorLookupWallTime{"lookupWallNanos"}; - /// The cpu time that the index connector do the lookup. - static inline const std::string kConnectorLookupCpuTime{"lookupCpuNanos"}; + /// The end-to-end walltime in nanoseconds that the index connector do the + /// lookup. + static inline const std::string kConnectorLookupWallTime{ + "connectorLookupWallNanos"}; + /// The cpu time in nanoseconds that the index connector process response from + /// storage client for followup processing by index join operator. + static inline const std::string kConnectorResultPrepareTime{ + "connectorResultPrepareCpuNanos"}; + /// The cpu time in nanoseconds that the storage client process request for + /// remote storage lookup such as encoding the lookup input data into remotr + /// storage request. + static inline const std::string kClientRequestProcessTime{ + "clientRequestProcessCpuNanos"}; + /// The walltime in nanoseconds that the storage client wait for the lookup + /// from remote storage. + static inline const std::string kClientLookupWaitWallTime{ + "clientlookupWaitWallNanos"}; + /// The cpu time in nanoseconds that the storage client process response from + /// remote storage lookup such as decoding the response data into velox + /// vectors. + static inline const std::string kClientResultProcessTime{ + "clientResultProcessCpuNanos"}; + /// The byte size of the raw result received from the remote storage lookup. + static inline const std::string kClientLookupResultRawSize{ + "clientLookupResultRawSize"}; + /// The byte size of the result data in velox vectors that are decoded from + /// the raw data received from the remote storage lookup. + static inline const std::string kClientLookupResultSize{ + "clientLookupResultSize"}; private: using LookupResultIter = connector::IndexSource::LookupResultIterator; diff --git a/velox/exec/TableScan.cpp b/velox/exec/TableScan.cpp index 232786f14ce1..67d1c5ff9f8c 100644 --- a/velox/exec/TableScan.cpp +++ b/velox/exec/TableScan.cpp @@ -282,6 +282,10 @@ RowVectorPtr TableScan::getOutput() { {maxFilteringRatio_, 1.0 * data->size() / readBatchSize, 1.0 / kMaxSelectiveBatchSizeMultiplier}); + if (ioTimeUs > 0) { + RECORD_HISTOGRAM_METRIC_VALUE( + velox::kMetricTableScanBatchProcessTimeMs, ioTimeUs / 1'000); + } return data; } continue; diff --git a/velox/exec/tests/IndexLookupJoinTest.cpp b/velox/exec/tests/IndexLookupJoinTest.cpp index 65c4529ae352..967aa809c837 100644 --- a/velox/exec/tests/IndexLookupJoinTest.cpp +++ b/velox/exec/tests/IndexLookupJoinTest.cpp @@ -1817,13 +1817,22 @@ DEBUG_ONLY_TEST_P(IndexLookupJoinTest, runtimeStats) { numProbeBatches); ASSERT_GT(runtimeStats.at(IndexLookupJoin::kConnectorLookupWallTime).sum, 0); ASSERT_EQ( - runtimeStats.at(IndexLookupJoin::kConnectorLookupCpuTime).count, + runtimeStats.at(IndexLookupJoin::kClientLookupWaitWallTime).count, numProbeBatches); - ASSERT_GT(runtimeStats.at(IndexLookupJoin::kConnectorLookupCpuTime).sum, 0); + ASSERT_GT(runtimeStats.at(IndexLookupJoin::kClientLookupWaitWallTime).sum, 0); + ASSERT_EQ( + runtimeStats.at(IndexLookupJoin::kConnectorResultPrepareTime).count, + numProbeBatches); + ASSERT_GT( + runtimeStats.at(IndexLookupJoin::kConnectorResultPrepareTime).sum, 0); + ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientRequestProcessTime), 0); + ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientResultProcessTime), 0); + ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientLookupResultSize), 0); + ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientLookupResultRawSize), 0); ASSERT_THAT( operatorStats.toString(true, true), testing::MatchesRegex( - ".*Runtime stats.*lookupWallNanos.*lookupCpuNanos.*")); + ".*Runtime stats.*connectorLookupWallNanos:.*clientlookupWaitWallNanos.*connectorResultPrepareCpuNanos.*")); } TEST_P(IndexLookupJoinTest, joinFuzzer) { diff --git a/velox/exec/tests/PrintPlanWithStatsTest.cpp b/velox/exec/tests/PrintPlanWithStatsTest.cpp index b6dae05aa6e5..f6490da97dd6 100644 --- a/velox/exec/tests/PrintPlanWithStatsTest.cpp +++ b/velox/exec/tests/PrintPlanWithStatsTest.cpp @@ -163,28 +163,26 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) { {" -- HashJoin\\[3\\]\\[INNER c0=u_c0\\] -> c0:INTEGER, c1:BIGINT, u_c1:BIGINT"}, {" Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"}, {" HashBuild: Input: 100 rows \\(.+\\), Output: 0 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"}, - {" distinctKey0\\s+sum: 101, count: 1, min: 101, max: 101"}, + {" distinctKey0\\s+sum: 101, count: 1, min: 101, max: 101, avg: 101"}, {" hashtable.buildWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, - {" hashtable.capacity\\s+sum: 200, count: 1, min: 200, max: 200"}, - {" hashtable.numDistinct\\s+sum: 100, count: 1, min: 100, max: 100"}, - {" hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1"}, + {" hashtable.capacity\\s+sum: 200, count: 1, min: 200, max: 200, avg: 200"}, + {" hashtable.numDistinct\\s+sum: 100, count: 1, min: 100, max: 100, avg: 100"}, + {" hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1, avg: 1"}, {" queuedWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, - {" rangeKey0\\s+sum: 200, count: 1, min: 200, max: 200"}, + {" rangeKey0\\s+sum: 200, count: 1, min: 200, max: 200, avg: 200"}, {" runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" HashProbe: Input: 2000 rows \\(.+\\), Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"}, // These lines may or may not appear depending on whether the operator // gets blocked during a run. - {" blockedWaitForJoinBuildTimes sum: 1, count: 1, min: 1, max: 1", - true}, - {" blockedWaitForJoinBuildWallNanos\\s+sum: .+, count: 1, min: .+, max: .+", - true}, - {" dynamicFiltersProduced\\s+sum: 1, count: 1, min: 1, max: 1"}, - {" queuedWallNanos\\s+sum: .+, count: 1, min: .+, max: .+", - true}, // This line may or may not appear depending on how the threads - // running the Drivers are executed, this only appears if the - // HashProbe has to wait for the HashBuild construction. + {" blockedWaitForJoinBuildTimes sum: 1, count: 1, min: 1, max: 1, avg: 1"}, + {" blockedWaitForJoinBuildWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, + {" dynamicFiltersProduced\\s+sum: 1, count: 1, min: 1, max: 1, avg: 1"}, + {" queuedWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, + // This line may or may not appear depending on how the threads + // running the Drivers are executed, this only appears if the + // HashProbe has to wait for the HashBuild construction. {" runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, @@ -192,21 +190,19 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) { {" Input: 2000 rows \\(.+\\), Raw Input: 20480 rows \\(.+\\), Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, Splits: 20, DynamicFilter producer plan nodes: 3, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"}, {" dataSourceAddSplitWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, {" dataSourceReadWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" dynamicFiltersAccepted[ ]* sum: 1, count: 1, min: 1, max: 1"}, + {" dynamicFiltersAccepted[ ]* sum: 1, count: 1, min: 1, max: 1, avg: 1"}, {" footerBufferOverread[ ]* sum: .+, count: 1, min: .+, max: .+"}, {" ioWaitWallNanos [ ]* sum: .+, count: .+ min: .+, max: .+"}, {" maxSingleIoWaitWallNanos[ ]*sum: .+, count: 1, min: .+, max: .+"}, {" numPrefetch [ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" numRamRead [ ]* sum: 60, count: 1, min: 60, max: 60"}, + {" numRamRead [ ]* sum: 60, count: 1, min: 60, max: 60, avg: 60"}, {" numStorageRead [ ]* sum: .+, count: 1, min: .+, max: .+"}, {" numStripes[ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"}, + {" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B, avg: 0B"}, {" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+", - true}, + {" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+"}, {" ramReadBytes [ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+", - true}, + {" readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+"}, {" runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, @@ -272,11 +268,11 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) { {" dataSourceLazyInputBytes\\s+sum: .+, count: .+, min: .+, max: .+"}, {" dataSourceLazyWallNanos\\s+sum: .+, count: .+, min: .+, max: .+"}, {" distinctKey0\\s+sum: .+, count: 1, min: .+, max: .+"}, - {" hashtable.capacity\\s+sum: (?:1273|1252), count: 1, min: (?:1273|1252), max: (?:1273|1252)"}, - {" hashtable.numDistinct\\s+sum: (?:849|835), count: 1, min: (?:849|835), max: (?:849|835)"}, - {" hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1"}, - {" hashtable.numTombstones\\s+sum: 0, count: 1, min: 0, max: 0"}, - {" loadedToValueHook\\s+sum: 50000, count: 5, min: 10000, max: 10000"}, + {" hashtable.capacity\\s+sum: (?:1273|1252), count: 1, min: (?:1273|1252), max: (?:1273|1252), avg: (?:1273|1252)"}, + {" hashtable.numDistinct\\s+sum: (?:849|835), count: 1, min: (?:849|835), max: (?:849|835), avg: (?:849|835)"}, + {" hashtable.numRehashes\\s+sum: 1, count: 1, min: 1, max: 1, avg: 1"}, + {" hashtable.numTombstones\\s+sum: 0, count: 1, min: 0, max: 0, avg: 0"}, + {" loadedToValueHook\\s+sum: 50000, count: 5, min: 10000, max: 10000, avg: 10000"}, {" runningAddInputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningFinishWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, @@ -288,10 +284,10 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) { {" ioWaitWallNanos [ ]* sum: .+, count: .+ min: .+, max: .+"}, {" maxSingleIoWaitWallNanos[ ]*sum: .+, count: 1, min: .+, max: .+"}, {" numPrefetch [ ]* sum: .+, count: .+, min: .+, max: .+"}, - {" numRamRead [ ]* sum: 7, count: 1, min: 7, max: 7"}, + {" numRamRead [ ]* sum: 7, count: 1, min: 7, max: 7, avg: 7"}, {" numStorageRead [ ]* sum: .+, count: 1, min: .+, max: .+"}, {" numStripes[ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"}, + {" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B, avg: 0B"}, {" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"}, {" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+", @@ -358,10 +354,10 @@ TEST_F(PrintPlanWithStatsTest, tableWriterWithTableScan) { {" ioWaitWallNanos [ ]* sum: .+, count: .+ min: .+, max: .+"}, {" maxSingleIoWaitWallNanos[ ]*sum: .+, count: 1, min: .+, max: .+"}, {" numPrefetch [ ]* sum: .+, count: .+, min: .+, max: .+"}, - {" numRamRead [ ]* sum: 7, count: 1, min: 7, max: 7"}, + {" numRamRead [ ]* sum: 7, count: 1, min: 7, max: 7, avg: 7"}, {" numStorageRead [ ]* sum: .+, count: 1, min: .+, max: .+"}, {" numStripes[ ]* sum: .+, count: 1, min: .+, max: .+"}, - {" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"}, + {" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B, avg: 0B"}, {" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"}, {" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+", diff --git a/velox/exec/tests/utils/TestIndexStorageConnector.cpp b/velox/exec/tests/utils/TestIndexStorageConnector.cpp index cc6fad549753..3dfdfe04e3b2 100644 --- a/velox/exec/tests/utils/TestIndexStorageConnector.cpp +++ b/velox/exec/tests/utils/TestIndexStorageConnector.cpp @@ -208,14 +208,26 @@ void TestIndexSource::initOutputProjections() { void TestIndexSource::recordCpuTiming(const CpuWallTiming& timing) { VELOX_CHECK_EQ(timing.count, 1); std::lock_guard l(mutex_); - addOperatorRuntimeStats( - IndexLookupJoin::kConnectorLookupWallTime, - RuntimeCounter(timing.wallNanos, RuntimeCounter::Unit::kNanos), - runtimeStats_); - addOperatorRuntimeStats( - IndexLookupJoin::kConnectorLookupCpuTime, - RuntimeCounter(timing.cpuNanos, RuntimeCounter::Unit::kNanos), - runtimeStats_); + if (timing.wallNanos != 0) { + addOperatorRuntimeStats( + IndexLookupJoin::kConnectorLookupWallTime, + RuntimeCounter(timing.wallNanos, RuntimeCounter::Unit::kNanos), + runtimeStats_); + // This is just for testing purpose to check if the runtime stats has been + // set properly. + addOperatorRuntimeStats( + IndexLookupJoin::kClientLookupWaitWallTime, + RuntimeCounter(timing.wallNanos, RuntimeCounter::Unit::kNanos), + runtimeStats_); + } + if (timing.cpuNanos != 0) { + // This is just for testing purpose to check if the runtime stats has been + // set properly. + addOperatorRuntimeStats( + IndexLookupJoin::kConnectorResultPrepareTime, + RuntimeCounter(timing.cpuNanos, RuntimeCounter::Unit::kNanos), + runtimeStats_); + } } std::unordered_map TestIndexSource::runtimeStats() {