Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions python/sglang/backend/runtime_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@


class RuntimeEndpoint(BaseBackend):
def __init__(self, base_url, auth_token=None):
def __init__(self, base_url, auth_token=None, verify=None):
super().__init__()
self.support_concate_and_append = True

self.base_url = base_url
self.auth_token = auth_token
self.verify = verify

res = http_request(
self.base_url + "/get_model_info", auth_token=self.auth_token
self.base_url + "/get_model_info", auth_token=self.auth_token, verify=self.verify
)
assert res.status_code == 200
self.model_info = res.json()
Expand All @@ -40,6 +41,7 @@ def cache_prefix(self, prefix_str: str):
self.base_url + "/generate",
json={"text": prefix_str, "sampling_params": {"max_new_tokens": 0}},
auth_token=self.auth_token,
verify=self.verify
)
assert res.status_code == 200

Expand All @@ -48,14 +50,15 @@ def commit_lazy_operations(self, s: StreamExecutor):
self.base_url + "/generate",
json={"text": s.text_, "sampling_params": {"max_new_tokens": 0}},
auth_token=self.auth_token,
verify=self.verify
)
assert res.status_code == 200

def fill_image(self, s: StreamExecutor):
data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}}
self._add_images(s, data)
res = http_request(
self.base_url + "/generate", json=data, auth_token=self.auth_token
self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify
)
assert res.status_code == 200

Expand Down Expand Up @@ -87,7 +90,7 @@ def generate(
self._add_images(s, data)

res = http_request(
self.base_url + "/generate", json=data, auth_token=self.auth_token
self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify
)
obj = res.json()
comp = obj["text"]
Expand Down Expand Up @@ -126,6 +129,7 @@ def generate_stream(
json=data,
stream=True,
auth_token=self.auth_token,
verify=self.verify
)
pos = 0

Expand Down Expand Up @@ -157,7 +161,7 @@ def select(
data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}}
self._add_images(s, data)
res = http_request(
self.base_url + "/generate", json=data, auth_token=self.auth_token
self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify
)
assert res.status_code == 200
prompt_len = res.json()["meta_info"]["prompt_tokens"]
Expand All @@ -171,7 +175,7 @@ def select(
}
self._add_images(s, data)
res = http_request(
self.base_url + "/generate", json=data, auth_token=self.auth_token
self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify
)
assert res.status_code == 200
obj = res.json()
Expand All @@ -188,6 +192,7 @@ def concatenate_and_append(self, src_rids: List[str], dst_rid: str):
self.base_url + "/concate_and_append_request",
json={"src_rids": src_rids, "dst_rid": dst_rid},
auth_token=self.auth_token,
verify=self.verify
)
assert res.status_code == 200

Expand Down
8 changes: 4 additions & 4 deletions python/sglang/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,16 @@ def status_code(self):
return self.resp.status


def http_request(url, json=None, stream=False, auth_token=None):
def http_request(url, json=None, stream=False, auth_token=None, verify=None):
"""A faster version of requests.post with low-level urllib API."""
if stream:
if auth_token is None:
return requests.post(url, json=json, stream=True)
return requests.post(url, json=json, stream=True, verify=verify)
headers = {
"Content-Type": "application/json",
"Authentication": f"Bearer {auth_token}",
}
return requests.post(url, json=json, stream=True, headers=headers)
return requests.post(url, json=json, stream=True, headers=headers, verify=verify)
else:
req = urllib.request.Request(url)
req.add_header("Content-Type", "application/json; charset=utf-8")
Expand All @@ -107,7 +107,7 @@ def http_request(url, json=None, stream=False, auth_token=None):
data = None
else:
data = bytes(dumps(json), encoding="utf-8")
resp = urllib.request.urlopen(req, data=data)
resp = urllib.request.urlopen(req, data=data, cafile=verify)
return HttpResponse(resp)


Expand Down