-
Couldn't load subscription status.
- Fork 659
fix: Add support for single element arrays for chat and completions prompts #3482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
zhongdaor-nv
merged 14 commits into
main
from
zhongdaor/dis-717-open-ai-chat-completion-prompt-is-not-respected-if-we-send
Oct 14, 2025
Merged
Changes from 10 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
5d06d3a
fix: A WAR to make completion end point accept as StringArray as input
zhongdaor-nv fc96565
Merge branch 'main' into zhongdaor/dis-717-open-ai-chat-completion-pr…
zhongdaor-nv 629f87f
fix
zhongdaor-nv 1216dfd
Merge branch 'main' into zhongdaor/dis-717-open-ai-chat-completion-pr…
zhongdaor-nv 7037085
support single-element StringArray input
zhongdaor-nv 090fc73
add tests
zhongdaor-nv af74290
Merge branch 'main' into zhongdaor/dis-717-open-ai-chat-completion-pr…
zhongdaor-nv ce14f6e
pre-commit
zhongdaor-nv 6264c58
fix
zhongdaor-nv 6652223
pre-commit
zhongdaor-nv 68211e2
using mock engine to test
zhongdaor-nv 9205ecc
pre-commit
zhongdaor-nv 8e45478
rename test file
zhongdaor-nv 1adb7f0
Merge branch 'main' into zhongdaor/dis-717-open-ai-chat-completion-pr…
zhongdaor-nv File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,216 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """End-to-end tests covering reasoning effort behaviour.""" | ||
|
|
||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| import os | ||
| import shutil | ||
| from typing import Any, Dict | ||
|
|
||
| import pytest | ||
| import requests | ||
|
|
||
| from tests.conftest import EtcdServer, NatsServer | ||
| from tests.utils.constants import GPT_OSS | ||
| from tests.utils.managed_process import ManagedProcess | ||
| from tests.utils.payloads import check_models_api | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| REASONING_TEST_MODEL = GPT_OSS | ||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| class DynamoFrontendProcess(ManagedProcess): | ||
| """Process manager for Dynamo frontend""" | ||
|
|
||
| def __init__(self, request): | ||
| command = ["python", "-m", "dynamo.frontend", "--router-mode", "round-robin"] | ||
|
|
||
| log_dir = f"{request.node.name}_frontend" | ||
|
|
||
| # Clean up any existing log directory from previous runs | ||
| try: | ||
| shutil.rmtree(log_dir) | ||
| logger.info(f"Cleaned up existing log directory: {log_dir}") | ||
| except FileNotFoundError: | ||
| # Directory doesn't exist, which is fine | ||
| pass | ||
|
|
||
| super().__init__( | ||
| command=command, | ||
| display_output=True, | ||
| terminate_existing=True, | ||
| log_dir=log_dir, | ||
| ) | ||
|
|
||
|
|
||
| class GPTOSSWorkerProcess(ManagedProcess): | ||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """Worker process for GPT-OSS model.""" | ||
|
|
||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| def __init__(self, request, worker_id: str = "reasoning-worker"): | ||
| self.worker_id = worker_id | ||
|
|
||
| command = [ | ||
| "python3", | ||
| "-m", | ||
| "dynamo.vllm", | ||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| "--model", | ||
| REASONING_TEST_MODEL, | ||
| "--enforce-eager", | ||
| "--dyn-tool-call-parser", | ||
| "harmony", | ||
| "--dyn-reasoning-parser", | ||
| "gpt_oss", | ||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ] | ||
|
|
||
| env = os.environ.copy() | ||
| env["DYN_LOG"] = "debug" | ||
| env["DYN_SYSTEM_ENABLED"] = "true" | ||
| env["DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS"] = '["generate"]' | ||
| env["DYN_SYSTEM_PORT"] = "8083" | ||
|
|
||
| log_dir = f"{request.node.name}_{worker_id}" | ||
|
|
||
| try: | ||
| shutil.rmtree(log_dir) | ||
| except FileNotFoundError: | ||
| pass | ||
|
|
||
| super().__init__( | ||
| command=command, | ||
| env=env, | ||
| health_check_urls=[ | ||
| ("http://localhost:8000/v1/models", check_models_api), | ||
| ("http://localhost:8083/health", self.is_ready), | ||
| ], | ||
| timeout=300, | ||
| display_output=True, | ||
| terminate_existing=False, | ||
| stragglers=["VLLM::EngineCore"], | ||
| straggler_commands=["-m dynamo.vllm"], | ||
| log_dir=log_dir, | ||
| ) | ||
|
|
||
| def is_ready(self, response) -> bool: | ||
| try: | ||
| status = (response.json() or {}).get("status") | ||
| except ValueError: | ||
| logger.warning("%s health response is not valid JSON", self.worker_id) | ||
| return False | ||
|
|
||
| is_ready = status == "ready" | ||
| if is_ready: | ||
| logger.info("%s status is ready", self.worker_id) | ||
| else: | ||
| logger.warning("%s status is not ready: %s", self.worker_id, status) | ||
| return is_ready | ||
|
|
||
|
|
||
| def _send_completion_request( | ||
| payload: Dict[str, Any], | ||
| timeout: int = 180, | ||
| ) -> requests.Response: | ||
| """Send a text completion request""" | ||
|
|
||
| headers = {"Content-Type": "application/json"} | ||
|
|
||
| response = requests.post( | ||
| "http://localhost:8000/v1/completions", | ||
| headers=headers, | ||
| json=payload, | ||
| timeout=timeout, | ||
| ) | ||
| return response | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
| def runtime_services(request): | ||
| """Module-scoped runtime services for this test file.""" | ||
| with NatsServer(request) as nats_process: | ||
| with EtcdServer(request) as etcd_process: | ||
| yield nats_process, etcd_process | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
| def start_services(request, runtime_services, predownload_models): | ||
| """Start frontend and worker processes once for this module's tests.""" | ||
| with DynamoFrontendProcess(request): | ||
| logger.info("Frontend started for tests") | ||
| with GPTOSSWorkerProcess(request): | ||
| logger.info("Worker started for tests") | ||
| yield | ||
|
|
||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| @pytest.mark.usefixtures("start_services") | ||
| @pytest.mark.vllm | ||
| @pytest.mark.gpu_1 | ||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| @pytest.mark.e2e | ||
| @pytest.mark.model(REASONING_TEST_MODEL) | ||
| def test_completion_single_element_array_prompt() -> None: | ||
| """Exercise completions with reasoning effort and prompt.""" | ||
| reasoning_effort = "low" | ||
|
|
||
| payload: Dict[str, Any] = { | ||
| "model": REASONING_TEST_MODEL, | ||
| "prompt": ["Tell me about Mars"], | ||
| "max_tokens": 2000, | ||
| "chat_template_args": {"reasoning_effort": reasoning_effort}, | ||
| } | ||
|
|
||
| response = _send_completion_request(payload) | ||
|
|
||
| assert response.status_code == 200, ( | ||
| f"Completion request ({reasoning_effort}) failed with status " | ||
| f"{response.status_code}: {response.text}" | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.usefixtures("start_services") | ||
| @pytest.mark.vllm | ||
| @pytest.mark.gpu_1 | ||
| @pytest.mark.e2e | ||
| @pytest.mark.model(REASONING_TEST_MODEL) | ||
| def test_completion_multi_string_prompt() -> None: | ||
| """Exercise completions with reasoning effort and prompt.""" | ||
| reasoning_effort = "low" | ||
|
|
||
| payload: Dict[str, Any] = { | ||
| "model": REASONING_TEST_MODEL, | ||
| "prompt": "Tell me about Mars", | ||
| "max_tokens": 2000, | ||
| "chat_template_args": {"reasoning_effort": reasoning_effort}, | ||
| } | ||
|
|
||
| response = _send_completion_request(payload) | ||
|
|
||
| assert response.status_code == 200, ( | ||
| f"Completion request ({reasoning_effort}) failed with status " | ||
| f"{response.status_code}: {response.text}" | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.usefixtures("start_services") | ||
| @pytest.mark.vllm | ||
| @pytest.mark.gpu_1 | ||
| @pytest.mark.e2e | ||
| @pytest.mark.model(REASONING_TEST_MODEL) | ||
| def test_completion_multi_element_array_prompt() -> None: | ||
| """Exercise completions with reasoning effort and prompt.""" | ||
| reasoning_effort = "low" | ||
zhongdaor-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| payload: Dict[str, Any] = { | ||
| "model": REASONING_TEST_MODEL, | ||
| "prompt": ["Tell me about Mars", "Tell me about Ceres"], | ||
| "max_tokens": 2000, | ||
| "chat_template_args": {"reasoning_effort": reasoning_effort}, | ||
| } | ||
|
|
||
| response = _send_completion_request(payload) | ||
|
|
||
| # request should fail because we are sending multiple prompts | ||
| assert ( | ||
| response.status_code == 500 | ||
| ), f"Request should fail with code 500; response:{response.text}" | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.