-
Notifications
You must be signed in to change notification settings - Fork 130
[FIX_FOR_VLLM_CUSTOM=d28d86e8a34bf2617be294c235d6e6ef3321917b] #1279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
c4be0bf
[FIX_FOR_VLLM_CUSTOM=d28d86e8a34bf2617be294c235d6e6ef3321917b]
iboiko-habana e9120ab
fix offloading scenario
iboiko-habana 5606a33
fix offloading scenario
iboiko-habana 1a4e6d9
[FIX_FOR_VLLM_CUSTOM=d28d86e8a34bf2617be294c235d6e6ef3321917b] fix fo…
iboiko-habana 72b2013
[FIX_FOR_VLLM_CUSTOM=d28d86e8a34bf2617be294c235d6e6ef3321917b] fix fo…
iboiko-habana 067920a
Fix for #37902
iboiko-habana 32c8ce1
Merge branch 'main' into hourly2903
iboiko-habana 9b68b78
Merge branch 'main' into hourly2903
iboiko-habana 4c01a9e
Adding tblib and disablement of qwen35
iboiko-habana 1267239
Merge branch 'main' into hourly2903
iboiko-habana 4182e63
fix for offloading tests
iboiko-habana 504a153
Merge branch 'main' into hourly2903
iboiko-habana bca5d02
Merge branch 'main' into hourly2903
iboiko-habana 908b12b
Update qwen3_5.py - after rebase
iboiko-habana 3aaa9e4
Update ci_e2e_discoverable_tests.sh
iboiko-habana 89fc66c
Update ci_e2e_discoverable_tests.sh
iboiko-habana File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,3 +5,4 @@ numba>=0.58.0 | |
| numpy>=1.26.0 | ||
| transformers>= 4.56.0, <5 | ||
| kaldi-native-fbank >= 1.18.7 | ||
| tblib==3.1.0 | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
| from tests.unit_tests.kv_offload.offloading_connector.utils import ( | ||
| request_runner, ) | ||
|
|
||
| __all__ = ["request_runner"] | ||
242 changes: 242 additions & 0 deletions
242
tests/unit_tests/kv_offload/offloading_connector/test_metrics.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,242 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
|
|
||
| from vllm.distributed.kv_transfer.kv_connector.v1.offloading_connector import ( | ||
| OffloadingConnector, | ||
| OffloadingConnectorStats, | ||
| ) | ||
|
|
||
|
|
||
| def test_build_kv_connector_stats_with_none(): | ||
| """Test that build_kv_connector_stats returns empty stats when given None.""" | ||
| stats = OffloadingConnector.build_kv_connector_stats(data=None) | ||
|
|
||
| assert stats is not None | ||
| assert isinstance(stats, OffloadingConnectorStats) | ||
| assert len(stats.data) == 0 | ||
| assert stats.is_empty() | ||
|
|
||
|
|
||
| def test_build_kv_connector_stats_with_empty_dict(): | ||
| """Test that build_kv_connector_stats returns empty stats with empty dict.""" | ||
| stats = OffloadingConnector.build_kv_connector_stats(data={}) | ||
|
|
||
| assert stats is not None | ||
| assert isinstance(stats, OffloadingConnectorStats) | ||
| assert len(stats.data) == 0 | ||
| assert stats.is_empty() | ||
|
|
||
|
|
||
| def test_build_kv_connector_stats_reconstructs_offload_stats(): | ||
| """Test that OffloadingConnector stats are properly reconstructed with | ||
| correct data.""" | ||
| serialized_data = { | ||
| "CPU_to_GPU": [ | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 1.0 | ||
| }, | ||
| { | ||
| "op_size": 8, | ||
| "op_time": 0.5 | ||
| }, | ||
| ], | ||
| "GPU_to_CPU": [ | ||
| { | ||
| "op_size": 1, | ||
| "op_time": 0.1 | ||
| }, | ||
| { | ||
| "op_size": 2, | ||
| "op_time": 0.2 | ||
| }, | ||
| ], | ||
| } | ||
|
|
||
| stats = OffloadingConnector.build_kv_connector_stats(data=serialized_data) | ||
|
|
||
| offload_connector_stats = stats | ||
| assert isinstance(offload_connector_stats, OffloadingConnectorStats) | ||
| assert offload_connector_stats.data["CPU_to_GPU"] == [ | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 1.0 | ||
| }, | ||
| { | ||
| "op_size": 8, | ||
| "op_time": 0.5 | ||
| }, | ||
| ] | ||
| assert offload_connector_stats.data["GPU_to_CPU"] == [ | ||
| { | ||
| "op_size": 1, | ||
| "op_time": 0.1 | ||
| }, | ||
| { | ||
| "op_size": 2, | ||
| "op_time": 0.2 | ||
| }, | ||
| ] | ||
|
|
||
|
|
||
| def test_aggregate_same_connector(): | ||
| """Test aggregating stats from the same connector type.""" | ||
| stats1 = OffloadingConnectorStats( | ||
| data={ | ||
| "CPU_to_GPU": [ | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 1.0 | ||
| }, | ||
| { | ||
| "op_size": 8, | ||
| "op_time": 0.5 | ||
| }, | ||
| ], | ||
| "GPU_to_CPU": [ | ||
| { | ||
| "op_size": 1, | ||
| "op_time": 0.1 | ||
| }, | ||
| { | ||
| "op_size": 2, | ||
| "op_time": 0.2 | ||
| }, | ||
| ], | ||
| }) | ||
|
|
||
| stats2 = OffloadingConnectorStats( | ||
| data={ | ||
| "CPU_to_GPU": [ | ||
| { | ||
| "op_size": 3, | ||
| "op_time": 0.2 | ||
| }, | ||
| { | ||
| "op_size": 7, | ||
| "op_time": 0.9 | ||
| }, | ||
| ], | ||
| "GPU_to_CPU": [{ | ||
| "op_size": 16, | ||
| "op_time": 2 | ||
| }], | ||
| }) | ||
|
|
||
| result = stats1.aggregate(stats2) | ||
|
|
||
| assert result is stats1 # Should return self | ||
| offload_connector_stats = result | ||
| assert offload_connector_stats.data["CPU_to_GPU"] == [ | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 1.0 | ||
| }, | ||
| { | ||
| "op_size": 8, | ||
| "op_time": 0.5 | ||
| }, | ||
| { | ||
| "op_size": 3, | ||
| "op_time": 0.2 | ||
| }, | ||
| { | ||
| "op_size": 7, | ||
| "op_time": 0.9 | ||
| }, | ||
| ] | ||
| assert offload_connector_stats.data["GPU_to_CPU"] == [ | ||
| { | ||
| "op_size": 1, | ||
| "op_time": 0.1 | ||
| }, | ||
| { | ||
| "op_size": 2, | ||
| "op_time": 0.2 | ||
| }, | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 2 | ||
| }, | ||
| ] | ||
|
|
||
|
|
||
| def test_reduce(): | ||
| """Test that reduce() correctly reduces all nested connector stats.""" | ||
| stats = OffloadingConnectorStats( | ||
| data={ | ||
| "CPU_to_GPU": [ | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 1.0 | ||
| }, | ||
| { | ||
| "op_size": 8, | ||
| "op_time": 0.5 | ||
| }, | ||
| { | ||
| "op_size": 3, | ||
| "op_time": 0.2 | ||
| }, | ||
| { | ||
| "op_size": 7, | ||
| "op_time": 0.9 | ||
| }, | ||
| ], | ||
| "GPU_to_CPU": [ | ||
| { | ||
| "op_size": 1, | ||
| "op_time": 0.1 | ||
| }, | ||
| { | ||
| "op_size": 2, | ||
| "op_time": 0.2 | ||
| }, | ||
| { | ||
| "op_size": 16, | ||
| "op_time": 2 | ||
| }, | ||
| ], | ||
| }) | ||
|
|
||
| reduced = stats.reduce() | ||
|
|
||
| assert isinstance(reduced, dict) | ||
| # Check that the stats were reduced (should have aggregated values) | ||
| assert "CPU_to_GPU_total_bytes" in reduced | ||
| assert "CPU_to_GPU_total_time" in reduced | ||
| assert "GPU_to_CPU_total_bytes" in reduced | ||
| assert "GPU_to_CPU_total_time" in reduced | ||
| assert reduced["CPU_to_GPU_total_bytes"] == 34 | ||
| assert reduced["CPU_to_GPU_total_time"] == 2.6 | ||
| assert reduced["GPU_to_CPU_total_time"] == 2.3 | ||
| assert reduced["GPU_to_CPU_total_bytes"] == 19 | ||
|
|
||
|
|
||
| def test_reset(): | ||
| """Test that reset() resets all nested connector stats.""" | ||
| offload_connector_stats = OffloadingConnectorStats( | ||
| data={ | ||
| "CPU_to_GPU": [ | ||
| { | ||
| "op_size": 3, | ||
| "op_time": 0.2 | ||
| }, | ||
| { | ||
| "op_size": 7, | ||
| "op_time": 0.9 | ||
| }, | ||
| ], | ||
| "GPU_to_CPU": [{ | ||
| "op_size": 16, | ||
| "op_time": 2 | ||
| }], | ||
| }) | ||
|
|
||
| assert not offload_connector_stats.is_empty() | ||
|
|
||
| offload_connector_stats.reset() | ||
|
|
||
| # After reset, stats should be empty | ||
| assert offload_connector_stats.is_empty() | ||
| assert len(offload_connector_stats.data) == 0 |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These parenthesized imports include whitespace before the closing
)(e.g.request_runner, )), which typically triggers pycodestyle/ruff E202. Please reformat the import (or run the formatter).