sgl-project · Ying1123 · Jul 17, 2024 · Jul 17, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,9 @@
+repos:
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+  - repo: https://github.com/psf/black
+    rev: stable
+    hooks:
+      - id: black
diff --git a/benchmark/latency_throughput/bench_serving.py b/benchmark/latency_throughput/bench_serving.py
@@ -312,8 +312,8 @@ def main(args: argparse.Namespace):
         np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time
     )
 
-    #latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
-    #print(latencies)
+    # latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
+    # print(latencies)
 
     print(f"Total time: {benchmark_time:.2f} s")
     print(f"Request throughput: {args.num_prompts / benchmark_time:.2f} requests/s")

diff --git a/benchmark/line_retrieval/gen_data.py b/benchmark/line_retrieval/gen_data.py
@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
         )
         for i in redirect_indices:
             target_idx = np.random.choice(min(i * 2 + 100, num_lines))
-            lines[
-                i
-            ] = f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
+            lines[i] = (
+                f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
+            )
             redirects[i] = target_idx
 
     # Build links and find sources

diff --git a/examples/quick_start/anthropic_example_chat.py b/examples/quick_start/anthropic_example_chat.py
@@ -3,6 +3,7 @@
 export ANTHROPIC_API_KEY=sk-******
 python3 anthropic_example_chat.py
 """
+
 import sglang as sgl
 
 
@@ -30,7 +31,7 @@ def stream():
     state = multi_turn_question.run(
         question_1="What is the capital of the United States?",
         question_2="List two local attractions.",
-        stream=True
+        stream=True,
     )
 
     for out in state.text_iter():
@@ -39,13 +40,18 @@ def stream():
 
 
 def batch():
-    states = multi_turn_question.run_batch([
-        {"question_1": "What is the capital of the United States?",
-         "question_2": "List two local attractions."},
-
-        {"question_1": "What is the capital of France?",
-         "question_2": "What is the population of this city?"},
-    ])
+    states = multi_turn_question.run_batch(
+        [
+            {
+                "question_1": "What is the capital of the United States?",
+                "question_2": "List two local attractions.",
+            },
+            {
+                "question_1": "What is the capital of France?",
+                "question_2": "What is the population of this city?",
+            },
+        ]
+    )
 
     for s in states:
         print(s.messages())

diff --git a/examples/quick_start/anthropic_example_complete.py b/examples/quick_start/anthropic_example_complete.py
@@ -9,15 +9,14 @@
 
 @sgl.function
 def few_shot_qa(s, question):
-    s += (
-"""
+    s += """
 \n\nHuman: What is the capital of France?
 \n\nAssistant: Paris
 \n\nHuman: What is the capital of Germany?
 \n\nAssistant: Berlin
 \n\nHuman: What is the capital of Italy?
 \n\nAssistant: Rome
-""")
+"""
     s += "\n\nHuman: " + question + "\n"
     s += "\n\nAssistant:" + sgl.gen("answer", temperature=0)
 
@@ -33,19 +32,21 @@ def single():
 
 def stream():
     state = few_shot_qa.run(
-        question="What is the capital of the United States?",
-        stream=True)
+        question="What is the capital of the United States?", stream=True
+    )
 
     for out in state.text_iter("answer"):
         print(out, end="", flush=True)
     print()
 
 
 def batch():
-    states = few_shot_qa.run_batch([
-        {"question": "What is the capital of the United States?"},
-        {"question": "What is the capital of China?"},
-    ])
+    states = few_shot_qa.run_batch(
+        [
+            {"question": "What is the capital of the United States?"},
+            {"question": "What is the capital of China?"},
+        ]
+    )
 
     for s in states:
         print(s["answer"])

diff --git a/examples/quick_start/azure_openai_example_chat.py b/examples/quick_start/azure_openai_example_chat.py
@@ -3,9 +3,11 @@
 export AZURE_OPENAI_API_KEY=sk-******
 python3 openai_example_chat.py
 """
-import sglang as sgl
+
 import os
 
+import sglang as sgl
+
 
 @sgl.function
 def multi_turn_question(s, question_1, question_2):
@@ -32,7 +34,7 @@ def stream():
     state = multi_turn_question.run(
         question_1="What is the capital of the United States?",
         question_2="List two local attractions.",
-        stream=True
+        stream=True,
     )
 
     for out in state.text_iter():
@@ -41,13 +43,18 @@ def stream():
 
 
 def batch():
-    states = multi_turn_question.run_batch([
-        {"question_1": "What is the capital of the United States?",
-         "question_2": "List two local attractions."},
-
-        {"question_1": "What is the capital of France?",
-         "question_2": "What is the population of this city?"},
-    ])
+    states = multi_turn_question.run_batch(
+        [
+            {
+                "question_1": "What is the capital of the United States?",
+                "question_2": "List two local attractions.",
+            },
+            {
+                "question_1": "What is the capital of France?",
+                "question_2": "What is the population of this city?",
+            },
+        ]
+    )
 
     for s in states:
         print(s.messages())

diff --git a/examples/quick_start/gemini_example_chat.py b/examples/quick_start/gemini_example_chat.py
@@ -3,6 +3,7 @@
 export GCP_PROJECT_ID=******
 python3 gemini_example_chat.py
 """
+
 import sglang as sgl
 
 
@@ -30,7 +31,7 @@ def stream():
     state = multi_turn_question.run(
         question_1="What is the capital of the United States?",
         question_2="List two local attractions.",
-        stream=True
+        stream=True,
     )
 
     for out in state.text_iter():
@@ -39,13 +40,18 @@ def stream():
 
 
 def batch():
-    states = multi_turn_question.run_batch([
-        {"question_1": "What is the capital of the United States?",
-         "question_2": "List two local attractions."},
-
-        {"question_1": "What is the capital of France?",
-         "question_2": "What is the population of this city?"},
-    ])
+    states = multi_turn_question.run_batch(
+        [
+            {
+                "question_1": "What is the capital of the United States?",
+                "question_2": "List two local attractions.",
+            },
+            {
+                "question_1": "What is the capital of France?",
+                "question_2": "What is the population of this city?",
+            },
+        ]
+    )
 
     for s in states:
         print(s.messages())

diff --git a/examples/quick_start/gemini_example_complete.py b/examples/quick_start/gemini_example_complete.py
@@ -9,15 +9,14 @@
 
 @sgl.function
 def few_shot_qa(s, question):
-    s += (
-"""The following are questions with answers.
+    s += """The following are questions with answers.
 Q: What is the capital of France?
 A: Paris
 Q: What is the capital of Germany?
 A: Berlin
 Q: What is the capital of Italy?
 A: Rome
-""")
+"""
     s += "Q: " + question + "\n"
     s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
 
@@ -33,19 +32,21 @@ def single():
 
 def stream():
     state = few_shot_qa.run(
-        question="What is the capital of the United States?",
-        stream=True)
+        question="What is the capital of the United States?", stream=True
+    )
 
     for out in state.text_iter("answer"):
         print(out, end="", flush=True)
     print()
 
 
 def batch():
-    states = few_shot_qa.run_batch([
-        {"question": "What is the capital of the United States?"},
-        {"question": "What is the capital of China?"},
-    ])
+    states = few_shot_qa.run_batch(
+        [
+            {"question": "What is the capital of the United States?"},
+            {"question": "What is the capital of China?"},
+        ]
+    )
 
     for s in states:
         print(s["answer"])

diff --git a/examples/quick_start/gemini_example_multimodal_chat.py b/examples/quick_start/gemini_example_multimodal_chat.py
@@ -3,6 +3,7 @@
 export GCP_PROJECT_ID=******
 python3 gemini_example_multimodal_chat.py
 """
+
 import sglang as sgl
 
 
@@ -19,7 +20,7 @@ def image_qa(s, image_file1, image_file2, question):
         image_file1="./images/cat.jpeg",
         image_file2="./images/dog.jpeg",
         question="Describe difference of the two images in one sentence.",
-        stream=True
+        stream=True,
     )
 
     for out in state.text_iter("answer"):

diff --git a/examples/quick_start/openai_example_chat.py b/examples/quick_start/openai_example_chat.py
@@ -3,6 +3,7 @@
 export OPENAI_API_KEY=sk-******
 python3 openai_example_chat.py
 """
+
 import sglang as sgl
 
 
@@ -31,7 +32,7 @@ def stream():
     state = multi_turn_question.run(
         question_1="What is the capital of the United States?",
         question_2="List two local attractions.",
-        stream=True
+        stream=True,
     )
 
     for out in state.text_iter():
@@ -40,13 +41,18 @@ def stream():
 
 
 def batch():
-    states = multi_turn_question.run_batch([
-        {"question_1": "What is the capital of the United States?",
-         "question_2": "List two local attractions."},
-
-        {"question_1": "What is the capital of France?",
-         "question_2": "What is the population of this city?"},
-    ])
+    states = multi_turn_question.run_batch(
+        [
+            {
+                "question_1": "What is the capital of the United States?",
+                "question_2": "List two local attractions.",
+            },
+            {
+                "question_1": "What is the capital of France?",
+                "question_2": "What is the population of this city?",
+            },
+        ]
+    )
 
     for s in states:
         print(s.messages())

diff --git a/examples/quick_start/openai_example_complete.py b/examples/quick_start/openai_example_complete.py
@@ -9,15 +9,14 @@
 
 @sgl.function
 def few_shot_qa(s, question):
-    s += (
-"""The following are questions with answers.
+    s += """The following are questions with answers.
 Q: What is the capital of France?
 A: Paris
 Q: What is the capital of Germany?
 A: Berlin
 Q: What is the capital of Italy?
 A: Rome
-""")
+"""
     s += "Q: " + question + "\n"
     s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
 
@@ -33,19 +32,21 @@ def single():
 
 def stream():
     state = few_shot_qa.run(
-        question="What is the capital of the United States?",
-        stream=True)
+        question="What is the capital of the United States?", stream=True
+    )
 
     for out in state.text_iter("answer"):
         print(out, end="", flush=True)
     print()
 
 
 def batch():
-    states = few_shot_qa.run_batch([
-        {"question": "What is the capital of the United States?"},
-        {"question": "What is the capital of China?"},
-    ])
+    states = few_shot_qa.run_batch(
+        [
+            {"question": "What is the capital of the United States?"},
+            {"question": "What is the capital of China?"},
+        ]
+    )
 
     for s in states:
         print(s["answer"])

diff --git a/examples/quick_start/openrouter_example_chat.py b/examples/quick_start/openrouter_example_chat.py
@@ -3,9 +3,11 @@
 export OPENROUTER_API_KEY=sk-******
 python3 together_example_chat.py
 """
-import sglang as sgl
+
 import os
 
+import sglang as sgl
+
 
 @sgl.function
 def multi_turn_question(s, question_1, question_2):