Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
aa34a73
Initial offloading metrics collection.
Nov 2, 2025
7365ae9
Timing telemetry are now per-token instead of per-block.
Nov 2, 2025
fb23282
Post-lint
omerpaz95 Nov 2, 2025
d6b44d7
Another pre-commit fix.
omerpaz95 Nov 2, 2025
20e93ac
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 2, 2025
30bb045
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 4, 2025
af9da55
Fixed PR review-added PromMetrics
omerpaz95 Nov 4, 2025
5cef9b8
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 5, 2025
bd669ca
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 6, 2025
42e26b6
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 6, 2025
c511751
Fixed after code review by Or.
omerpaz95 Nov 18, 2025
be29129
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 18, 2025
cb85778
Merge branch 'main' into offload-connector-metrics
omerpaz95 Nov 18, 2025
fc8f71b
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 14, 2025
d3d19c1
Another pre-commit fix.
omerpaz95 Nov 2, 2025
20fddae
Small bugfixing
omerpaz95 Dec 21, 2025
a624c84
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 21, 2025
22409e5
pre-commit fix.
omerpaz95 Dec 22, 2025
30f551d
Changed metric logging as per new required spec
omerpaz95 Dec 25, 2025
a72a283
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 25, 2025
d994e8b
Fixed computation of request bytes.
omerpaz95 Dec 28, 2025
fd36f9f
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 28, 2025
ea35979
Fix pre-commit check.
omerpaz95 Dec 29, 2025
e892970
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 29, 2025
71c0154
Delete out.txt
omerpaz95 Dec 29, 2025
e4bb13a
Removed unneccessary changes.
omerpaz95 Dec 29, 2025
56313ca
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 29, 2025
8f92427
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 29, 2025
3402af1
removed unreferenced value.
omerpaz95 Dec 29, 2025
f074210
Merge branch 'main' into offload-connector-metrics
omerpaz95 Dec 30, 2025
c035d2f
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 12, 2026
1981e15
changed metric collection as per Or's request
omerpaz95 Jan 12, 2026
4a67a80
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 12, 2026
01f7b8d
Fixed after Or's review
omerpaz95 Jan 14, 2026
17738ad
Fixed after Mark's comments.
omerpaz95 Jan 14, 2026
4141d18
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 14, 2026
311a2bf
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 14, 2026
deb30c1
Another review fix.
omerpaz95 Jan 18, 2026
83fe59c
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 18, 2026
2b4564f
Fix after Or's review.
omerpaz95 Jan 21, 2026
92099d5
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 21, 2026
1677b51
Styling fixes
omerpaz95 Jan 21, 2026
53bf4da
Added type assert
omerpaz95 Jan 21, 2026
b7f4652
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 21, 2026
dfbb321
Minor fixes and tests conform with new datastructs and metrics
omerpaz95 Jan 22, 2026
7a421b5
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 22, 2026
e44d8c2
Fixed bugs.
omerpaz95 Jan 22, 2026
ba64beb
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 22, 2026
8846b5a
Merge branch 'main' into offload-connector-metrics
omerpaz95 Jan 22, 2026
f320fea
Merge branch 'main' into offload-connector-metrics
markmc Jan 26, 2026
bbc2cd1
Merge branch 'main' into offload-connector-metrics
markmc Jan 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 150 additions & 1 deletion tests/v1/kv_connector/unit/test_offloading_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from vllm.distributed.kv_transfer.kv_connector.v1.offloading_connector import (
OffloadingConnector,
OffloadingConnectorMetadata,
OffloadingConnectorStats,
)
from vllm.forward_context import ForwardContext
from vllm.utils.hashing import sha256
Expand Down Expand Up @@ -86,7 +87,14 @@ def complete_jobs(self, job_ids: set[int]) -> None:
if job_id in self.waiting_jobs:
self.waiting_jobs.remove(job_id)
self.completed_jobs.append(job_id)
self.completed_transfers.append((job_id, True))
result = TransferResult(
job_id=job_id,
success=True,
transfer_size=None,
transfer_time=None,
transfer_type=None,
)
self.completed_transfers.append(result)

def wait(self, job_ids: set[int]) -> None:
self.flushed_jobs |= job_ids
Expand Down Expand Up @@ -720,3 +728,144 @@ def test_concurrent_lookups_of_the_same_prefix(request_runner):

# second request will use the GPU prefix cache
assert transfer_jobs == list(runner.offloading_spec.handler.transfer_specs)


class TestOffloadingConnectorStats:
"""Tests for OffloadingConnector stats reconstruction and operations."""

def test_build_kv_connector_stats_with_none(self):
"""Test that build_kv_connector_stats returns empty stats when given None."""
stats = OffloadingConnector.build_kv_connector_stats(data=None)

assert stats is not None
assert isinstance(stats, OffloadingConnectorStats)
assert len(stats.data) == 0
assert stats.is_empty()

def test_build_kv_connector_stats_with_empty_dict(self):
"""Test that build_kv_connector_stats returns empty stats with empty dict."""
stats = OffloadingConnector.build_kv_connector_stats(data={})

assert stats is not None
assert isinstance(stats, OffloadingConnectorStats)
assert len(stats.data) == 0
assert stats.is_empty()

def test_build_kv_connector_stats_reconstructs_offload_stats(self):
"""Test that OffloadingConnector stats are properly reconstructed with
correct data."""
serialized_data = {
"CPU_to_GPU": [
{"op_size": 16, "op_time": 1.0},
{"op_size": 8, "op_time": 0.5},
],
"GPU_to_CPU": [
{"op_size": 1, "op_time": 0.1},
{"op_size": 2, "op_time": 0.2},
],
}

stats = OffloadingConnector.build_kv_connector_stats(data=serialized_data)

offload_connector_stats = stats
assert isinstance(offload_connector_stats, OffloadingConnectorStats)
assert offload_connector_stats.data["CPU_to_GPU"] == [
{"op_size": 16, "op_time": 1.0},
{"op_size": 8, "op_time": 0.5},
]
assert offload_connector_stats.data["GPU_to_CPU"] == [
{"op_size": 1, "op_time": 0.1},
{"op_size": 2, "op_time": 0.2},
]

def test_aggregate_same_connector(self):
"""Test aggregating stats from the same connector type."""
stats1 = OffloadingConnectorStats(
data={
"CPU_to_GPU": [
{"op_size": 16, "op_time": 1.0},
{"op_size": 8, "op_time": 0.5},
],
"GPU_to_CPU": [
{"op_size": 1, "op_time": 0.1},
{"op_size": 2, "op_time": 0.2},
],
}
)

stats2 = OffloadingConnectorStats(
data={
"CPU_to_GPU": [
{"op_size": 3, "op_time": 0.2},
{"op_size": 7, "op_time": 0.9},
],
"GPU_to_CPU": [{"op_size": 16, "op_time": 2}],
}
)

result = stats1.aggregate(stats2)

assert result is stats1 # Should return self
offload_connector_stats = result
assert offload_connector_stats.data["CPU_to_GPU"] == [
{"op_size": 16, "op_time": 1.0},
{"op_size": 8, "op_time": 0.5},
{"op_size": 3, "op_time": 0.2},
{"op_size": 7, "op_time": 0.9},
]
assert offload_connector_stats.data["GPU_to_CPU"] == [
{"op_size": 1, "op_time": 0.1},
{"op_size": 2, "op_time": 0.2},
{"op_size": 16, "op_time": 2},
]

def test_reduce(self):
"""Test that reduce() correctly reduces all nested connector stats."""
stats = OffloadingConnectorStats(
data={
"CPU_to_GPU": [
{"op_size": 16, "op_time": 1.0},
{"op_size": 8, "op_time": 0.5},
{"op_size": 3, "op_time": 0.2},
{"op_size": 7, "op_time": 0.9},
],
"GPU_to_CPU": [
{"op_size": 1, "op_time": 0.1},
{"op_size": 2, "op_time": 0.2},
{"op_size": 16, "op_time": 2},
],
}
)

reduced = stats.reduce()

assert isinstance(reduced, dict)
# Check that the stats were reduced (should have aggregated values)
assert "CPU_to_GPU_total_bytes" in reduced
assert "CPU_to_GPU_total_time" in reduced
assert "GPU_to_CPU_total_bytes" in reduced
assert "GPU_to_CPU_total_time" in reduced
assert reduced["CPU_to_GPU_total_bytes"] == 34
assert reduced["CPU_to_GPU_total_time"] == 2.6
assert reduced["GPU_to_CPU_total_time"] == 2.3
assert reduced["GPU_to_CPU_total_bytes"] == 19

def test_reset(self):
"""Test that reset() resets all nested connector stats."""
offload_connector_stats = OffloadingConnectorStats(
data={
"CPU_to_GPU": [
{"op_size": 3, "op_time": 0.2},
{"op_size": 7, "op_time": 0.9},
],
"GPU_to_CPU": [{"op_size": 16, "op_time": 2}],
}
)

assert not offload_connector_stats.is_empty()

offload_connector_stats.reset()

# After reset, stats should be empty
assert offload_connector_stats.is_empty()
assert len(offload_connector_stats.data) == 0
19 changes: 17 additions & 2 deletions tests/v1/kv_offload/test_cpu_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,30 @@ def test_transfer(
orig_dst_caches = [x.clone() for x in handler.dst_tensors]

# call transfer function
start_time = time.time()
assert handler.transfer_async(1, (src_spec, dst_spec))
assert set({x[0] for x in handler._transfers}) == {1}
assert set({x.job_id for x in handler._transfers}) == {1}

# wait for transfer to complete
end_time = time.time() + 10
while time.time() < end_time:
finished = handler.get_finished()
if finished:
assert finished == [(1, True)]
assert finished[0].job_id == 1
assert finished[0].success
assert (
finished[0].transfer_type == ("GPU", "CPU")
if gpu_to_cpu
else ("CPU", "GPU")
)
assert (
finished[0].transfer_size
== handler.total_block_size_in_bytes
* handler.dst_block_size_factor
* len(dst_blocks)
)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also assert 0 < finished[0].time < (time.time() - start_time)?

assert finished[0].transfer_time > 0
assert finished[0].transfer_time < (time.time() - start_time)
break
time.sleep(0.1)

Expand Down
4 changes: 2 additions & 2 deletions vllm/distributed/kv_transfer/kv_connector/v1/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(
self._counter_cls = metric_types[Counter]
self._histogram_cls = metric_types[Histogram]
self._labelnames = labelnames
self._per_engine_labelvalues = per_engine_labelvalues
self.per_engine_labelvalues = per_engine_labelvalues
Comment thread
markmc marked this conversation as resolved.

def make_per_engine(self, metric: PromMetric) -> dict[int, PromMetric]:
"""
Expand All @@ -134,7 +134,7 @@ def make_per_engine(self, metric: PromMetric) -> dict[int, PromMetric]:
"""
return {
idx: metric.labels(*labelvalues)
for idx, labelvalues in self._per_engine_labelvalues.items()
for idx, labelvalues in self.per_engine_labelvalues.items()
}

def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
Expand Down
Loading