Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions python/sglang/backend/runtime_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@


class RuntimeEndpoint(BaseBackend):
def __init__(self, base_url):
def __init__(self, base_url, auth_token=None):
super().__init__()
self.support_concate_and_append = True

self.base_url = base_url
self.auth_token = auth_token

res = http_request(self.base_url + "/get_model_info")
res = http_request(self.base_url + "/get_model_info", auth_token=self.auth_token)
assert res.status_code == 200
self.model_info = res.json()

Expand All @@ -36,20 +37,22 @@ def cache_prefix(self, prefix_str: str):
res = http_request(
self.base_url + "/generate",
json={"text": prefix_str, "sampling_params": {"max_new_tokens": 0}},
auth_token=self.auth_token
)
assert res.status_code == 200

def commit_lazy_operations(self, s: StreamExecutor):
res = http_request(
self.base_url + "/generate",
json={"text": s.text_, "sampling_params": {"max_new_tokens": 0}},
auth_token=self.auth_token
)
assert res.status_code == 200

def fill_image(self, s: StreamExecutor):
data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}}
self._add_images(s, data)
res = http_request(self.base_url + "/generate", json=data)
res = http_request(self.base_url + "/generate", json=data, auth_token=self.auth_token)
assert res.status_code == 200

def generate(
Expand Down Expand Up @@ -79,7 +82,7 @@ def generate(

self._add_images(s, data)

res = http_request(self.base_url + "/generate", json=data)
res = http_request(self.base_url + "/generate", json=data, auth_token=self.auth_token)
obj = res.json()
comp = obj["text"]
return comp, obj["meta_info"]
Expand Down Expand Up @@ -112,7 +115,7 @@ def generate_stream(
data["stream"] = True
self._add_images(s, data)

response = http_request(self.base_url + "/generate", json=data, stream=True)
response = http_request(self.base_url + "/generate", json=data, stream=True, auth_token=self.auth_token)
pos = 0

incomplete_text = ""
Expand Down Expand Up @@ -142,7 +145,7 @@ def select(
# Cache common prefix
data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}}
self._add_images(s, data)
res = http_request(self.base_url + "/generate", json=data)
res = http_request(self.base_url + "/generate", json=data, auth_token=self.auth_token)
assert res.status_code == 200
prompt_len = res.json()["meta_info"]["prompt_tokens"]

Expand All @@ -154,7 +157,7 @@ def select(
"logprob_start_len": max(prompt_len - 2, 0),
}
self._add_images(s, data)
res = http_request(self.base_url + "/generate", json=data)
res = http_request(self.base_url + "/generate", json=data, auth_token=self.auth_token)
assert res.status_code == 200
obj = res.json()
normalized_prompt_logprob = [
Expand All @@ -169,6 +172,7 @@ def concatenate_and_append(self, src_rids: List[str], dst_rid: str):
res = http_request(
self.base_url + "/concate_and_append_request",
json={"src_rids": src_rids, "dst_rid": dst_rid},
auth_token=self.auth_token
)
assert res.status_code == 200

Expand Down
9 changes: 7 additions & 2 deletions python/sglang/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,18 @@ def status_code(self):
return self.resp.status


def http_request(url, json=None, stream=False):
def http_request(url, json=None, stream=False, auth_token=None):
"""A faster version of requests.post with low-level urllib API."""
if stream:
return requests.post(url, json=json, stream=True)
headers = {
"Content-Type": "application/json",
"Authentication": f"Bearer {auth_token}"
}
return requests.post(url, json=json, stream=True, headers={})
else:
req = urllib.request.Request(url)
req.add_header("Content-Type", "application/json; charset=utf-8")
req.add_header("Authentication", f"Bearer {auth_token}")
if json is None:
data = None
else:
Expand Down