Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
4 changes: 2 additions & 2 deletions benchmark/latency_throughput/bench_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ def main(args: argparse.Namespace):
np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time
)

#latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
#print(latencies)
# latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
# print(latencies)

print(f"Total time: {benchmark_time:.2f} s")
print(f"Request throughput: {args.num_prompts / benchmark_time:.2f} requests/s")
Expand Down
6 changes: 3 additions & 3 deletions benchmark/line_retrieval/gen_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
)
for i in redirect_indices:
target_idx = np.random.choice(min(i * 2 + 100, num_lines))
lines[
i
] = f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
lines[i] = (
f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
)
redirects[i] = target_idx

# Build links and find sources
Expand Down
22 changes: 14 additions & 8 deletions examples/quick_start/anthropic_example_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export ANTHROPIC_API_KEY=sk-******
python3 anthropic_example_chat.py
"""

import sglang as sgl


Expand Down Expand Up @@ -30,7 +31,7 @@ def stream():
state = multi_turn_question.run(
question_1="What is the capital of the United States?",
question_2="List two local attractions.",
stream=True
stream=True,
)

for out in state.text_iter():
Expand All @@ -39,13 +40,18 @@ def stream():


def batch():
states = multi_turn_question.run_batch([
{"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions."},

{"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?"},
])
states = multi_turn_question.run_batch(
[
{
"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions.",
},
{
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)

for s in states:
print(s.messages())
Expand Down
19 changes: 10 additions & 9 deletions examples/quick_start/anthropic_example_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

@sgl.function
def few_shot_qa(s, question):
s += (
"""
s += """
\n\nHuman: What is the capital of France?
\n\nAssistant: Paris
\n\nHuman: What is the capital of Germany?
\n\nAssistant: Berlin
\n\nHuman: What is the capital of Italy?
\n\nAssistant: Rome
""")
"""
s += "\n\nHuman: " + question + "\n"
s += "\n\nAssistant:" + sgl.gen("answer", temperature=0)

Expand All @@ -33,19 +32,21 @@ def single():

def stream():
state = few_shot_qa.run(
question="What is the capital of the United States?",
stream=True)
question="What is the capital of the United States?", stream=True
)

for out in state.text_iter("answer"):
print(out, end="", flush=True)
print()


def batch():
states = few_shot_qa.run_batch([
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
])
states = few_shot_qa.run_batch(
[
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
]
)

for s in states:
print(s["answer"])
Expand Down
25 changes: 16 additions & 9 deletions examples/quick_start/azure_openai_example_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
export AZURE_OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import sglang as sgl

import os

import sglang as sgl


@sgl.function
def multi_turn_question(s, question_1, question_2):
Expand All @@ -32,7 +34,7 @@ def stream():
state = multi_turn_question.run(
question_1="What is the capital of the United States?",
question_2="List two local attractions.",
stream=True
stream=True,
)

for out in state.text_iter():
Expand All @@ -41,13 +43,18 @@ def stream():


def batch():
states = multi_turn_question.run_batch([
{"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions."},

{"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?"},
])
states = multi_turn_question.run_batch(
[
{
"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions.",
},
{
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)

for s in states:
print(s.messages())
Expand Down
22 changes: 14 additions & 8 deletions examples/quick_start/gemini_example_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export GCP_PROJECT_ID=******
python3 gemini_example_chat.py
"""

import sglang as sgl


Expand Down Expand Up @@ -30,7 +31,7 @@ def stream():
state = multi_turn_question.run(
question_1="What is the capital of the United States?",
question_2="List two local attractions.",
stream=True
stream=True,
)

for out in state.text_iter():
Expand All @@ -39,13 +40,18 @@ def stream():


def batch():
states = multi_turn_question.run_batch([
{"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions."},

{"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?"},
])
states = multi_turn_question.run_batch(
[
{
"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions.",
},
{
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)

for s in states:
print(s.messages())
Expand Down
19 changes: 10 additions & 9 deletions examples/quick_start/gemini_example_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

@sgl.function
def few_shot_qa(s, question):
s += (
"""The following are questions with answers.
s += """The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
""")
"""
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n", temperature=0)

Expand All @@ -33,19 +32,21 @@ def single():

def stream():
state = few_shot_qa.run(
question="What is the capital of the United States?",
stream=True)
question="What is the capital of the United States?", stream=True
)

for out in state.text_iter("answer"):
print(out, end="", flush=True)
print()


def batch():
states = few_shot_qa.run_batch([
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
])
states = few_shot_qa.run_batch(
[
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
]
)

for s in states:
print(s["answer"])
Expand Down
3 changes: 2 additions & 1 deletion examples/quick_start/gemini_example_multimodal_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export GCP_PROJECT_ID=******
python3 gemini_example_multimodal_chat.py
"""

import sglang as sgl


Expand All @@ -19,7 +20,7 @@ def image_qa(s, image_file1, image_file2, question):
image_file1="./images/cat.jpeg",
image_file2="./images/dog.jpeg",
question="Describe difference of the two images in one sentence.",
stream=True
stream=True,
)

for out in state.text_iter("answer"):
Expand Down
22 changes: 14 additions & 8 deletions examples/quick_start/openai_example_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""

import sglang as sgl


Expand Down Expand Up @@ -31,7 +32,7 @@ def stream():
state = multi_turn_question.run(
question_1="What is the capital of the United States?",
question_2="List two local attractions.",
stream=True
stream=True,
)

for out in state.text_iter():
Expand All @@ -40,13 +41,18 @@ def stream():


def batch():
states = multi_turn_question.run_batch([
{"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions."},

{"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?"},
])
states = multi_turn_question.run_batch(
[
{
"question_1": "What is the capital of the United States?",
"question_2": "List two local attractions.",
},
{
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)

for s in states:
print(s.messages())
Expand Down
19 changes: 10 additions & 9 deletions examples/quick_start/openai_example_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

@sgl.function
def few_shot_qa(s, question):
s += (
"""The following are questions with answers.
s += """The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
""")
"""
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n", temperature=0)

Expand All @@ -33,19 +32,21 @@ def single():

def stream():
state = few_shot_qa.run(
question="What is the capital of the United States?",
stream=True)
question="What is the capital of the United States?", stream=True
)

for out in state.text_iter("answer"):
print(out, end="", flush=True)
print()


def batch():
states = few_shot_qa.run_batch([
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
])
states = few_shot_qa.run_batch(
[
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
]
)

for s in states:
print(s["answer"])
Expand Down
4 changes: 3 additions & 1 deletion examples/quick_start/openrouter_example_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
export OPENROUTER_API_KEY=sk-******
python3 together_example_chat.py
"""
import sglang as sgl

import os

import sglang as sgl


@sgl.function
def multi_turn_question(s, question_1, question_2):
Expand Down
Loading