From dd2b03764bdf243b95f62112713494a7298f21da Mon Sep 17 00:00:00 2001 From: haosdent Date: Tue, 28 Apr 2026 23:07:10 +0800 Subject: [PATCH] [CI] De-flake test_chat_completion_n_parameter_non_streaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test sampled n=3 completions at temperature=0.7 with no seed and asserted the three responses differ. On a low-entropy prompt with only 20 output tokens, all three samples can collapse to the same string, which surfaced as an intermittent CI failure on an unrelated commit. Fix the seed (42), raise temperature to 1.0, and widen the token budget to 50 — mirroring the existing pattern in tests/entrypoints/openai/completion/test_completion.py for the same n>1 diversity assertion. With distinct per-child RNG streams derived from the seed, divergence is deterministic. Signed-off-by: haosdent --- tests/entrypoints/openai/chat_completion/test_chat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/chat_completion/test_chat.py b/tests/entrypoints/openai/chat_completion/test_chat.py index 212839f78d5c..80f54f6800ae 100644 --- a/tests/entrypoints/openai/chat_completion/test_chat.py +++ b/tests/entrypoints/openai/chat_completion/test_chat.py @@ -845,9 +845,10 @@ async def test_chat_completion_n_parameter_non_streaming( chat_completion = await client.chat.completions.create( model=model_name, messages=messages, - max_completion_tokens=20, - temperature=0.7, + max_completion_tokens=50, + temperature=1.0, n=3, + seed=42, stream=False, ) @@ -859,7 +860,6 @@ async def test_chat_completion_n_parameter_non_streaming( assert choice.message.content is not None assert len(choice.message.content) > 0 - # Verify all responses are different (highly likely with temperature > 0) contents = [choice.message.content for choice in chat_completion.choices] assert len(set(contents)) > 1, "Expected different responses with n=3"