Skip to content
Merged
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
137a895
Update README.md
zhixiongli2011 Sep 19, 2025
5ccb21c
Update README.md
zhixiongli2011 Sep 19, 2025
6291f02
Merge branch 'ai-dynamo:main' into main
zhixiongli2011 Sep 25, 2025
495ec19
Merge branch 'ai-dynamo:main' into main
zhixiongli2011 Sep 26, 2025
732f574
Update test_router_e2e_with_mockers.py with a new helper function
zhixiongli2011 Sep 27, 2025
a56f295
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
460b1c4
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
51786dc
Update tests/router/test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
163fe70
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
b9fe4b8
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
28ff962
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
45ca761
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
60bfa53
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
e90f558
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
29b184f
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
8e19e41
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
93502ed
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
a9ae437
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 27, 2025
d4f11ae
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 29, 2025
30b0de4
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 29, 2025
288c263
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 29, 2025
f703fd3
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 29, 2025
3f4d77b
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 30, 2025
e4682ef
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 30, 2025
abfda5a
Update test_router_e2e_with_mockers.py
zhixiongli2011 Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 86 additions & 60 deletions tests/router/test_router_e2e_with_mockers.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,91 @@ async def send_single_request(session: aiohttp.ClientSession, request_id: int):
logger.info(f"All {num_requests} requests completed successfully")


async def send_request_to_specified_mocker_instance(
mockers: MockerProcess,
token_ids: Optional[list] = None,
stop_conditions: Optional[dict] = None,
sampling_options: Optional[dict] = None,
output_options: Optional[dict] = None,
router_config_override: Optional[dict] = None,
worker_id: Optional[int] = None # If None, Router will select the best available worker
):
"""Send a request to the specified mocker instance.
Returns True if mockers respond, otherwise raises or returns False.
"""
runtime = get_runtime()
namespace = runtime.namespace(mockers.namespace)
component = namespace.component("mocker")
endpoint = component.endpoint("generate")

kv_router_config = KvRouterConfig()
kv_push_router = KvPushRouter(
endpoint=endpoint,
block_size=BLOCK_SIZE,
kv_router_config=kv_router_config,
)

initial_wait = 1.0
max_retries = 8
wait_time = initial_wait

# Use provided token_ids or fallback to small dummy list
test_token_ids = token_ids if token_ids is not None else [1, 2, 3]

# Use provided stop_conditions or fallback to generate only 1 token
test_stop_conditions = stop_conditions if stop_conditions is not None else {"max_tokens": 1}

# Use provided sampling_options or fallback to use temperature:0.7
test_sampling_options = sampling_options if sampling_options is not None else {"temperature": 0.7}

# Use provided output_options or fallback to exclude input tokens and full text
test_output_options = output_options if output_options is not None else {"include_input_tokens": False, "return_full_text": False}

# Use provided router_config_override or fallback to default overlap_score_weight
test_router_config_override = router_config_override if router_config_override is not None else {"overlap_score_weight": 1.0}

# Validate worker_id: accept only int or None
if worker_id is not None and not isinstance(worker_id, int):
raise ValueError(
f"backend_instance_id must be an int or None, got: {type(worker_id)}"
)

log_message = f"the mocker with worker_id={worker_id}" if worker_id is not None else "the best available mocker"

for attempt in range(max_retries + 1):
try:
logger.info(
f"Sending request to {log_message} (attempt {attempt + 1})"
)

stream = await kv_push_router.generate(
token_ids=test_token_ids,
model=MODEL_NAME,
stop_conditions=test_stop_conditions,
sampling_options=test_sampling_options,
output_options=test_output_options,
router_config_override=test_router_config_override,
worker_id=worker_id
)

# consume stream to verify
async for response in stream:
pass

logger.info(f"Request succeeded on attempt {attempt + 1}")
return True

except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed with error: {e}")
if attempt < max_retries:
await asyncio.sleep(wait_time)
wait_time *= 2
else:
raise RuntimeError(
f"Failed to connect to mockers after {max_retries + 1} attempts: {e}"
)
return False

@pytest.mark.pre_merge
@pytest.mark.model(MODEL_NAME)
def test_mocker_kv_router(request, runtime_services, predownload_tokenizers):
Expand Down Expand Up @@ -584,68 +669,9 @@ def test_kv_push_router_bindings(request, runtime_services, predownload_tokenize
logger.info(f"All mockers using endpoint: {mockers.endpoint}")
mockers.__enter__()

# Wait for mockers to be ready by sending a dummy request with retry
async def wait_for_mockers_ready():
"""Send a dummy request to ensure mockers are ready"""
runtime = get_runtime()
# Use the namespace from the mockers
namespace = runtime.namespace(mockers.namespace)
component = namespace.component("mocker")
endpoint = component.endpoint("generate")

kv_router_config = KvRouterConfig()
kv_push_router = KvPushRouter(
endpoint=endpoint,
block_size=BLOCK_SIZE,
kv_router_config=kv_router_config,
)

# Dummy request with minimal tokens
dummy_token_ids = [1, 2, 3] # Just a few tokens for testing
max_retries = 8
wait_time = 1

for attempt in range(max_retries + 1):
try:
logger.info(
f"Sending dummy request to check mocker readiness (attempt {attempt + 1})"
)
stream = await kv_push_router.generate(
token_ids=dummy_token_ids,
model=MODEL_NAME,
stop_conditions={"max_tokens": 1}, # Generate just 1 token
sampling_options={"temperature": 0.7},
output_options={
"include_input_tokens": False,
"return_full_text": False,
},
)

# Consume the stream to verify it works
token_count = 0
async for response in stream:
if isinstance(response, dict) and "token_ids" in response:
token_count += len(response["token_ids"])

logger.info(
f"Mockers are ready! Dummy request succeeded on attempt {attempt + 1}"
)
return True

except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed with error: {e}")
if attempt < max_retries:
await asyncio.sleep(wait_time)
wait_time *= 2 # Exponential backoff
else:
raise RuntimeError(
f"Failed to connect to mockers after {max_retries + 1} attempts"
)

return False

# Wait for mockers to be ready
asyncio.run(wait_for_mockers_ready())
asyncio.run(send_request_to_specified_mocker_instance(mockers=mockers))

# Run the async test
async def test_kv_push_router():
Expand Down
Loading