vllm-project · DarkLight1337 · May 22, 2025 · May 20, 2025 · googs1025 · May 20, 2025
diff --git a/examples/online_serving/openai_chat_completion_structured_outputs.py b/examples/online_serving/openai_chat_completion_structured_outputs.py
@@ -12,6 +12,9 @@
 from openai import BadRequestError, OpenAI
 from pydantic import BaseModel
 
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+
 
 # Guided decoding by Choice (list of possible options)
 def guided_choice_completion(client: OpenAI, model: str):
@@ -134,8 +137,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
 
 def main():
     client: OpenAI = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="-",
+        base_url=openai_api_base,
+        api_key=openai_api_key,
     )
 
     model = client.models.list().data[0].id

diff --git a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
@@ -7,11 +7,14 @@
 # to enforce the format of a tool call response, but it could be used for
 # any structured output within a subset of the response.
 
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+
 
 def main():
     client = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="-",
+        base_url=openai_api_base,
+        api_key=openai_api_key,
     )
 
     messages = [{

diff --git a/examples/online_serving/openai_completion_client.py b/examples/online_serving/openai_completion_client.py
@@ -1,13 +1,23 @@
 # SPDX-License-Identifier: Apache-2.0
 
+import argparse
+
 from openai import OpenAI
 
 # Modify OpenAI's API key and API base to use vLLM's API server.
 openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
 
 
-def main():
+def parse_args():
+    parser = argparse.ArgumentParser(description="Client for vLLM API server")
+    parser.add_argument("--stream",
+                        action="store_true",
+                        help="Enable streaming response")
+    return parser.parse_args()
+
+
+def main(args):
     client = OpenAI(
         # defaults to os.environ.get("OPENAI_API_KEY")
         api_key=openai_api_key,
@@ -18,18 +28,17 @@ def main():
     model = models.data[0].id
 
     # Completion API
-    stream = False
     completion = client.completions.create(
         model=model,
         prompt="A robot may not injure a human being",
         echo=False,
         n=2,
-        stream=stream,
+        stream=args.stream,
         logprobs=3)
 
     print("-" * 50)
     print("Completion results:")
-    if stream:
+    if args.stream:
         for c in completion:
             print(c)
     else:
@@ -38,4 +47,5 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    args = parse_args()
+    main(args)