diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py index e99a299c12ee..fe67147ebc04 100644 --- a/tests/entrypoints/openai/responses/test_harmony.py +++ b/tests/entrypoints/openai/responses/test_harmony.py @@ -1259,27 +1259,28 @@ async def test_system_prompt_override(client: OpenAI, model_name: str): assert response.status == "completed" assert response.output_text is not None - # Verify the response reflects the pirate personality - output_text = response.output_text.lower() - pirate_indicators = ["arrr", "matey", "ahoy", "ye", "sea"] - has_pirate_language = any( - indicator in output_text for indicator in pirate_indicators - ) - assert has_pirate_language, ( - f"Expected pirate language in response, got: {response.output_text}" - ) - - # Verify the reasoning mentions the custom system prompt + # Extract reasoning first (needed for relaxed persona check) reasoning_item = None for item in response.output: if item.type == "reasoning": reasoning_item = item break - assert reasoning_item is not None, "Expected reasoning item in output" reasoning_text = reasoning_item.content[0].text.lower() - assert "pirate" in reasoning_text, ( - f"Expected reasoning to mention pirate, got: {reasoning_text}" + + # Verify the custom system prompt was applied: either response uses pirate + # language, or reasoning mentions the pirate persona. Models may occasionally + # produce generic replies despite considering the persona. + output_text = response.output_text.lower() + pirate_indicators = ["arrr", "matey", "ahoy", "ye", "sea", "aye"] + has_pirate_language = any( + indicator in output_text for indicator in pirate_indicators + ) + reasoning_mentions_pirate = "pirate" in reasoning_text + assert has_pirate_language or reasoning_mentions_pirate, ( + f"Expected pirate language in response or 'pirate' in reasoning. " + f"Response: {response.output_text!r}. Reasoning excerpt: " + f"{reasoning_text[:200]!r}..." ) # Test 2: Verify system message is not duplicated in input_messages