|  | 
|  | 1 | +import pickle | 
|  | 2 | +import sys | 
|  | 3 | +from unittest.mock import MagicMock | 
|  | 4 | + | 
|  | 5 | +import cloudpickle | 
|  | 6 | +import mpi4py | 
|  | 7 | +import pytest | 
|  | 8 | + | 
|  | 9 | +from tensorrt_llm import mpi_rank | 
|  | 10 | +from tensorrt_llm._torch.pyexecutor.connector import KvCacheConnectorManager | 
|  | 11 | +from tensorrt_llm._torch.pyexecutor.scheduler import ScheduledRequests | 
|  | 12 | + | 
|  | 13 | +cloudpickle.register_pickle_by_value(sys.modules[__name__]) | 
|  | 14 | +mpi4py.MPI.pickle.__init__( | 
|  | 15 | +    cloudpickle.dumps, | 
|  | 16 | +    cloudpickle.loads, | 
|  | 17 | +    pickle.HIGHEST_PROTOCOL, | 
|  | 18 | +) | 
|  | 19 | + | 
|  | 20 | + | 
|  | 21 | +def run_across_mpi(executor, fun, num_ranks): | 
|  | 22 | +    return list(executor.starmap(fun, [() for i in range(num_ranks)])) | 
|  | 23 | + | 
|  | 24 | + | 
|  | 25 | +@pytest.mark.parametrize("mpi_pool_executor", [2], indirect=True) | 
|  | 26 | +def test_connector_manager_get_finished_allgather(mpi_pool_executor): | 
|  | 27 | + | 
|  | 28 | +    def test(): | 
|  | 29 | +        worker = MagicMock() | 
|  | 30 | + | 
|  | 31 | +        if mpi_rank() == 0: | 
|  | 32 | +            scheduler = MagicMock() | 
|  | 33 | + | 
|  | 34 | +            scheduler.request_finished.return_value = True | 
|  | 35 | +        else: | 
|  | 36 | +            scheduler = None | 
|  | 37 | + | 
|  | 38 | +        manager = KvCacheConnectorManager(worker, scheduler=scheduler) | 
|  | 39 | + | 
|  | 40 | +        req = MagicMock() | 
|  | 41 | + | 
|  | 42 | +        req.request_id = 42 | 
|  | 43 | + | 
|  | 44 | +        manager.request_finished(req) | 
|  | 45 | + | 
|  | 46 | +        # To start, make both workers return nothing. | 
|  | 47 | +        worker.get_finished.return_value = ([], []) | 
|  | 48 | + | 
|  | 49 | +        assert manager.get_finished() == [] | 
|  | 50 | + | 
|  | 51 | +        assert worker.get_finished.call_count == 1 | 
|  | 52 | +        assert worker.get_finished.call_args[0] == ([42], []) | 
|  | 53 | + | 
|  | 54 | +        worker.get_finished.reset_mock() | 
|  | 55 | + | 
|  | 56 | +        # Now, only return the request id on one worker. | 
|  | 57 | +        if mpi_rank() == 0: | 
|  | 58 | +            worker.get_finished.return_value = ([42], []) | 
|  | 59 | +        else: | 
|  | 60 | +            worker.get_finished.return_value = ([], []) | 
|  | 61 | + | 
|  | 62 | +        # It should still return nothing, since rank 1 is still saving. | 
|  | 63 | +        assert manager.get_finished() == [] | 
|  | 64 | + | 
|  | 65 | +        assert worker.get_finished.call_count == 1 | 
|  | 66 | +        assert worker.get_finished.call_args[0] == ([], []) | 
|  | 67 | + | 
|  | 68 | +        # Now, also return it on worker 1. | 
|  | 69 | +        if mpi_rank() == 0: | 
|  | 70 | +            worker.get_finished.return_value = ([], []) | 
|  | 71 | +        else: | 
|  | 72 | +            worker.get_finished.return_value = ([42], []) | 
|  | 73 | + | 
|  | 74 | +        assert manager.get_finished() == [req] | 
|  | 75 | + | 
|  | 76 | +    run_across_mpi(mpi_pool_executor, test, 2) | 
|  | 77 | + | 
|  | 78 | + | 
|  | 79 | +@pytest.mark.parametrize("mpi_pool_executor", [2], indirect=True) | 
|  | 80 | +def test_connector_manager_num_matched_tokens(mpi_pool_executor): | 
|  | 81 | + | 
|  | 82 | +    def test(): | 
|  | 83 | +        worker = MagicMock() | 
|  | 84 | + | 
|  | 85 | +        if mpi_rank() == 0: | 
|  | 86 | +            scheduler = MagicMock() | 
|  | 87 | +            scheduler.get_num_new_matched_tokens.return_value = (16, True) | 
|  | 88 | +        else: | 
|  | 89 | +            scheduler = None | 
|  | 90 | + | 
|  | 91 | +        manager = KvCacheConnectorManager(worker, scheduler=scheduler) | 
|  | 92 | + | 
|  | 93 | +        req = MagicMock() | 
|  | 94 | + | 
|  | 95 | +        req.request_id = 42 | 
|  | 96 | + | 
|  | 97 | +        assert manager.get_num_new_matched_tokens(req, 32) == 16 | 
|  | 98 | +        assert req.is_kv_cache_connector_async_onboard | 
|  | 99 | + | 
|  | 100 | +        if mpi_rank() == 0: | 
|  | 101 | +            assert scheduler.get_num_new_matched_tokens.call_count == 1 | 
|  | 102 | +            assert scheduler.get_num_new_matched_tokens.call_args[0] == (req, | 
|  | 103 | +                                                                         32) | 
|  | 104 | + | 
|  | 105 | +    run_across_mpi(mpi_pool_executor, test, 2) | 
|  | 106 | + | 
|  | 107 | + | 
|  | 108 | +@pytest.mark.parametrize("mpi_pool_executor", [2], indirect=True) | 
|  | 109 | +def test_connector_manager_take_scheduled_requests(mpi_pool_executor): | 
|  | 110 | + | 
|  | 111 | +    def test(): | 
|  | 112 | +        worker = MagicMock() | 
|  | 113 | + | 
|  | 114 | +        if mpi_rank() == 0: | 
|  | 115 | +            scheduler = MagicMock() | 
|  | 116 | +        else: | 
|  | 117 | +            scheduler = None | 
|  | 118 | + | 
|  | 119 | +        manager = KvCacheConnectorManager(worker, scheduler=scheduler) | 
|  | 120 | + | 
|  | 121 | +        scheduled_requests = ScheduledRequests() | 
|  | 122 | + | 
|  | 123 | +        req0 = MagicMock() | 
|  | 124 | +        req0.request_id = 0 | 
|  | 125 | + | 
|  | 126 | +        req1 = MagicMock() | 
|  | 127 | +        req1.request_id = 1 | 
|  | 128 | + | 
|  | 129 | +        if mpi_rank() == 0: | 
|  | 130 | +            scheduler.get_num_new_matched_tokens.return_value = (16, True) | 
|  | 131 | + | 
|  | 132 | +        assert manager.get_num_new_matched_tokens(req0, 0) == 16 | 
|  | 133 | +        if mpi_rank() == 0: | 
|  | 134 | +            assert scheduler.get_num_new_matched_tokens.call_count == 1 | 
|  | 135 | +            assert scheduler.get_num_new_matched_tokens.call_args[0] == (req0, | 
|  | 136 | +                                                                         0) | 
|  | 137 | + | 
|  | 138 | +            scheduler.get_num_new_matched_tokens.reset_mock() | 
|  | 139 | +            scheduler.get_num_new_matched_tokens.return_value = (32, False) | 
|  | 140 | + | 
|  | 141 | +        assert manager.get_num_new_matched_tokens(req1, 0) == 32 | 
|  | 142 | +        if mpi_rank() == 0: | 
|  | 143 | +            assert scheduler.get_num_new_matched_tokens.call_count == 1 | 
|  | 144 | +            assert scheduler.get_num_new_matched_tokens.call_args[0] == (req1, | 
|  | 145 | +                                                                         0) | 
|  | 146 | + | 
|  | 147 | +        scheduled_requests.context_requests = [req0, req1] | 
|  | 148 | + | 
|  | 149 | +        manager.take_scheduled_requests_pending_load(scheduled_requests) | 
|  | 150 | + | 
|  | 151 | +        assert scheduled_requests.context_requests == [req1] | 
|  | 152 | + | 
|  | 153 | +    run_across_mpi(mpi_pool_executor, test, 2) | 
0 commit comments