diff --git a/tensorrt_llm/serve/openai_server.py b/tensorrt_llm/serve/openai_server.py index 6fa7ee1952b..b86ca93c048 100644 --- a/tensorrt_llm/serve/openai_server.py +++ b/tensorrt_llm/serve/openai_server.py @@ -103,7 +103,12 @@ def __init__(self, # gpt-oss self.harmony_adapter: HarmonyAdapter | None = None - self.use_harmony = self.model_config.model_type == "gpt_oss" + disable_harmony = os.getenv("DISABLE_HARMONY_ADAPTER", "0") == "1" + if disable_harmony: + self.use_harmony = False + else: + self.use_harmony = (self.model_config.model_type == "gpt_oss" + and self.llm.args.num_postprocess_workers == 0) @asynccontextmanager async def lifespan(app: FastAPI):