From d59a5419a59a48b9c7ca6e722aadbe388275c80d Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Fri, 12 Sep 2025 08:47:00 +0800 Subject: [PATCH 1/4] feat: implement reverse proxy for custom command services Signed-off-by: Frost Ming --- docs/source/build-with-bentoml/services.rst | 23 ++- src/_bentoml_impl/server/app.py | 29 +++- src/_bentoml_impl/server/proxy.py | 129 ++++++++++++++ src/_bentoml_impl/server/serving.py | 161 +++++++----------- src/_bentoml_sdk/service/config.py | 1 + .../_internal/configuration/v2/__init__.py | 1 + .../v2/default_configuration.yaml | 1 + src/bentoml/_internal/server/base_app.py | 15 ++ .../_internal/server/http/instruments.py | 22 +-- 9 files changed, 257 insertions(+), 125 deletions(-) create mode 100644 src/_bentoml_impl/server/proxy.py diff --git a/docs/source/build-with-bentoml/services.rst b/docs/source/build-with-bentoml/services.rst index 822e136e79f..3defd206716 100644 --- a/docs/source/build-with-bentoml/services.rst +++ b/docs/source/build-with-bentoml/services.rst @@ -306,7 +306,28 @@ Alternatively, compute the command at runtime: Use this method when there are parameters whose values can only be determined at runtime. -When a custom command is provided, BentoML will launch a single process for that Service using your command. It will set the ``PORT`` environment variable (and ``BENTOML_HOST``/``BENTOML_PORT`` for the entry Service). Your process must listen on the provided ``PORT`` and serve HTTP endpoints. Server-level options like CORS/SSL/timeouts defined in BentoML won't apply automatically—configure them in your own server if needed. +BentoML operates by establishing a proxy service that directs all requests to the HTTP server initiated by the custom command. The default proxy port is ``8000``, specify a different one if the custom command is listening on another port: + +.. code-block:: python + + @bentoml.service(cmd=["myserver", "--port", "$PORT"], http={"proxy_port": 9000}) + class ExternalServer: + pass + +Metrics Rewriting +----------------- + +When starting a server with a custom command, it can be helpful to include metrics from that server. Alternatively, you can modify the metrics provided by the Prometheus exporter. +To achieve this, you can implement the ``__metrics__`` method in your Service class. This method takes the original metrics text as input and returns the modified metrics text: + +.. code-block:: python + + @bentoml.service(cmd=["myserver", "--port", "$PORT"]) + class ExternalServer: + def __metrics__(self, original_metrics: str) -> str: + # Modify the original metrics as needed + modified_metrics = original_metrics.replace('sglang', 'vllm') + return modified_metrics .. _bentoml-tasks: diff --git a/src/_bentoml_impl/server/app.py b/src/_bentoml_impl/server/app.py index 100267ffdbe..43dfb2a0b37 100644 --- a/src/_bentoml_impl/server/app.py +++ b/src/_bentoml_impl/server/app.py @@ -19,6 +19,7 @@ from starlette.responses import Response from starlette.staticfiles import StaticFiles +from _bentoml_impl.server.proxy import create_proxy_app from _bentoml_sdk import Service from _bentoml_sdk.service import set_current_service from bentoml._internal.container import BentoMLContainer @@ -26,6 +27,7 @@ from bentoml._internal.resource import system_resources from bentoml._internal.server.base_app import BaseAppFactory from bentoml._internal.server.http_app import log_exception +from bentoml._internal.utils import is_async_callable from bentoml._internal.utils.metrics import exponential_buckets from bentoml.exceptions import BentoMLException from bentoml.exceptions import ServiceUnavailable @@ -178,6 +180,10 @@ def __call__(self) -> Starlette: app = super().__call__() app.add_route("/schema.json", self.schema_view, name="schema") + if self.service.has_custom_command(): + # This may obscure all the routes behind, but this is expected. + self.service.mount_asgi_app(create_proxy_app(self.service), name="proxy") + for mount_app, path, name in self.service.mount_apps: app.router.routes.append(PassiveMount(path, mount_app, name=name)) @@ -418,6 +424,22 @@ async def readyz(self, _: Request) -> Response: return PlainTextResponse("\n", status_code=200) + async def metrics(self, _: Request) -> Response: # type: ignore[override] + metrics_client = BentoMLContainer.metrics_client.get() + metrics_content = await anyio.to_thread.run_sync(metrics_client.generate_latest) + if hasattr(self.service.inner, "__metrics__"): + func = getattr(self._service_instance, "__metrics__") + if not is_async_callable(func): + func = functools.partial(anyio.to_thread.run_sync, func) + metrics_content = (await func(metrics_content.decode("utf-8"))).encode( + "utf-8" + ) + return Response( + metrics_content, + status_code=200, + media_type=metrics_client.CONTENT_TYPE_LATEST, + ) + @contextlib.asynccontextmanager async def lifespan(self, app: Starlette) -> t.AsyncGenerator[None, None]: from starlette.applications import Starlette @@ -430,12 +452,11 @@ async def lifespan(self, app: Starlette) -> t.AsyncGenerator[None, None]: for mount_app, *_ in self.service.mount_apps: if isinstance(mount_app, Starlette): - maybe_state = await stack.enter_async_context( + _ = await stack.enter_async_context( mount_app.router.lifespan_context(mount_app) ) - if maybe_state is not None: - mount_app.state.update(maybe_state) - # TODO: support other ASGI apps + else: + pass # TODO: support other ASGI apps yield async def schema_view(self, request: Request) -> Response: diff --git a/src/_bentoml_impl/server/proxy.py b/src/_bentoml_impl/server/proxy.py new file mode 100644 index 00000000000..bf4ad8c9342 --- /dev/null +++ b/src/_bentoml_impl/server/proxy.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import contextlib +import logging +import typing as t + +import anyio +import httpx +from pyparsing import cast +from starlette.requests import Request + +from _bentoml_sdk import Service +from bentoml import get_current_service +from bentoml._internal.utils import expand_envs +from bentoml.exceptions import BentoMLConfigException + +if t.TYPE_CHECKING: + from starlette.applications import Starlette + +logger = logging.getLogger("bentoml.server") + + +async def _check_health(client: httpx.AsyncClient, health_endpoint: str) -> bool: + try: + response = await client.get(health_endpoint, timeout=5.0) + if response.status_code == 404: + raise BentoMLConfigException( + f"Health endpoint {health_endpoint} not found (404). Please make sure the health " + "endpoint is correctly configured in the service config." + ) + return response.is_success + except (httpx.HTTPError, httpx.RequestError): + return False + + +def create_proxy_app(service: Service[t.Any]) -> Starlette: + """A reverse-proxy that forwards all requests to the HTTP server started + by the custom command. + """ + import fastapi + from fastapi.responses import StreamingResponse + + health_endpoint = service.config.get("endpoints", {}).get("livez", "/health") + + @contextlib.asynccontextmanager + async def lifespan( + app: fastapi.FastAPI, + ) -> t.AsyncGenerator[dict[str, t.Any], None]: + server_instance = get_current_service() + assert server_instance is not None, "Current service is not initialized" + async with contextlib.AsyncExitStack() as stack: + if cmd_getter := getattr(server_instance, "__command__", None): + if not callable(cmd_getter): + raise TypeError( + f"__command__ must be a callable that returns a list of strings, got {type(cmd_getter)}" + ) + cmd = cast("list[str]", cmd_getter()) + else: + cmd = service.cmd + assert cmd is not None, "must have a command" + cmd = [expand_envs(c) for c in cmd] + logger.info("Running service with command: %s", " ".join(cmd)) + if ( + instance_client := getattr(server_instance, "client", None) + ) is not None and isinstance(instance_client, httpx.AsyncClient): + # TODO: support aiohttp client + client = instance_client + else: + proxy_port = service.config.get("http", {}).get("proxy_port") + if proxy_port is None: + raise BentoMLConfigException( + "proxy_port must be set in service config to use custom command" + ) + proxy_url = f"http://localhost:{proxy_port}" + client = await stack.enter_async_context( + httpx.AsyncClient(base_url=proxy_url, timeout=None) + ) + proc = await anyio.open_process(cmd, stdout=None, stderr=None) + while proc.returncode is None: + if await _check_health(client, health_endpoint): + break + await anyio.sleep(0.5) + else: + raise RuntimeError( + "Service process exited before becoming healthy, see the error above" + ) + + app.state.client = client + try: + state = {"proc": proc, "client": client} + service.context.state.update(state) + yield state + finally: + proc.terminate() + await proc.wait() + + assert service.has_custom_command(), "Service does not have custom command" + app = fastapi.FastAPI(lifespan=lifespan) + + # TODO: support websocket endpoints + @app.api_route( + "/{path:path}", + methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"], + ) + async def reverse_proxy(request: Request, path: str): + url = httpx.URL( + path=f"/{path}", query=request.url.query.encode("utf-8") or None + ) + client = t.cast(httpx.AsyncClient, app.state.client) + headers = dict(request.headers) + headers.pop("host", None) + req = client.build_request( + method=request.method, url=url, headers=headers, content=request.stream() + ) + try: + resp = await client.send(req, stream=True) + except httpx.ConnectError: + return fastapi.Response(503) + except httpx.RequestError: + return fastapi.Response(500) + + return StreamingResponse( + resp.aiter_raw(), + status_code=resp.status_code, + headers=resp.headers, + background=resp.aclose, + ) + + return app diff --git a/src/_bentoml_impl/server/serving.py b/src/_bentoml_impl/server/serving.py index a13b1ee0883..1db129cca94 100644 --- a/src/_bentoml_impl/server/serving.py +++ b/src/_bentoml_impl/server/serving.py @@ -68,7 +68,6 @@ def _get_server_socket( _SERVICE_WORKER_SCRIPT = "_bentoml_impl.worker.service" -_RUNNER_WORKER_SCRIPT = "_bentoml_impl.worker.runner" @inject @@ -87,39 +86,21 @@ def create_dependency_watcher( num_workers, worker_envs = scheduler.get_worker_env(svc) env = env or {} - if svc.has_custom_command(): - svc_port = port_stack.enter_context(reserve_free_port()) - env = {**os.environ, **env, "PORT": str(svc_port)} - uri = f"http://127.0.0.1:{svc_port}" - socket = None - cmd = sys.executable - args = [ - "-m", - _RUNNER_WORKER_SCRIPT, - bento_identifier, - "--service-name", - svc.name, - "--worker-id", - "$(CIRCUS.WID)", - "--args", - json.dumps(bento_args), - ] - else: - uri, socket = _get_server_socket(svc, uds_path, port_stack, backlog) - args = [ - "-m", - _SERVICE_WORKER_SCRIPT, - bento_identifier, - "--service-name", - svc.name, - "--fd", - f"$(circus.sockets.{svc.name})", - "--worker-id", - "$(CIRCUS.WID)", - "--args", - json.dumps(bento_args), - ] - cmd = sys.executable + uri, socket = _get_server_socket(svc, uds_path, port_stack, backlog) + args = [ + "-m", + _SERVICE_WORKER_SCRIPT, + bento_identifier, + "--service-name", + svc.name, + "--fd", + f"$(circus.sockets.{svc.name})", + "--worker-id", + "$(CIRCUS.WID)", + "--args", + json.dumps(bento_args), + ] + cmd = sys.executable env.update(worker_envs) watcher = create_watcher( @@ -288,74 +269,54 @@ def serve_http( raise BentoMLConfigException(f"Invalid host IP address: {host}") from e bento_args = BentoMLContainer.bento_arguments.get() env.update(worker_env) - if svc.has_custom_command(): - env.update(os.environ) - env.update( - {"PORT": str(port), "BENTOML_HOST": host, "BENTOML_PORT": str(port)} - ) - server_cmd = sys.executable - server_args = [ - "-m", - _RUNNER_WORKER_SCRIPT, - bento_identifier, - "--service-name", - svc.name, - "--worker-id", - "$(CIRCUS.WID)", - "--args", - json.dumps(bento_args), - ] - else: - sockets.append( - CircusSocket( - name=API_SERVER_NAME, - host=host, - port=port, - family=family, - backlog=backlog, - ) - ) - if BentoMLContainer.ssl.enabled.get() and not ssl_certfile: - raise BentoMLConfigException( - "ssl_certfile is required when ssl is enabled" - ) - - ssl_args = construct_ssl_args( - ssl_certfile=ssl_certfile, - ssl_keyfile=ssl_keyfile, - ssl_keyfile_password=ssl_keyfile_password, - ssl_version=ssl_version, - ssl_cert_reqs=ssl_cert_reqs, - ssl_ca_certs=ssl_ca_certs, - ssl_ciphers=ssl_ciphers, + sockets.append( + CircusSocket( + name=API_SERVER_NAME, + host=host, + port=port, + family=family, + backlog=backlog, ) - timeouts_args = construct_timeouts_args( - timeout_keep_alive=timeout_keep_alive, - timeout_graceful_shutdown=timeout_graceful_shutdown, - ) - timeout_args = ["--timeout", str(timeout)] if timeout else [] + ) + if BentoMLContainer.ssl.enabled.get() and not ssl_certfile: + raise BentoMLConfigException("ssl_certfile is required when ssl is enabled") + + ssl_args = construct_ssl_args( + ssl_certfile=ssl_certfile, + ssl_keyfile=ssl_keyfile, + ssl_keyfile_password=ssl_keyfile_password, + ssl_version=ssl_version, + ssl_cert_reqs=ssl_cert_reqs, + ssl_ca_certs=ssl_ca_certs, + ssl_ciphers=ssl_ciphers, + ) + timeouts_args = construct_timeouts_args( + timeout_keep_alive=timeout_keep_alive, + timeout_graceful_shutdown=timeout_graceful_shutdown, + ) + timeout_args = ["--timeout", str(timeout)] if timeout else [] - server_cmd = sys.executable - server_args = [ - "-m", - _SERVICE_WORKER_SCRIPT, - bento_identifier, - "--fd", - f"$(circus.sockets.{API_SERVER_NAME})", - "--service-name", - svc.name, - "--backlog", - str(backlog), - "--worker-id", - "$(CIRCUS.WID)", - "--args", - json.dumps(bento_args), - *ssl_args, - *timeouts_args, - *timeout_args, - ] - if development_mode: - server_args.append("--development-mode") + server_cmd = sys.executable + server_args = [ + "-m", + _SERVICE_WORKER_SCRIPT, + bento_identifier, + "--fd", + f"$(circus.sockets.{API_SERVER_NAME})", + "--service-name", + svc.name, + "--backlog", + str(backlog), + "--worker-id", + "$(CIRCUS.WID)", + "--args", + json.dumps(bento_args), + *ssl_args, + *timeouts_args, + *timeout_args, + ] + if development_mode: + server_args.append("--development-mode") scheme = "https" if BentoMLContainer.ssl.enabled.get() else "http" watchers.append( diff --git a/src/_bentoml_sdk/service/config.py b/src/_bentoml_sdk/service/config.py index 4d75df823b6..641abe94610 100644 --- a/src/_bentoml_sdk/service/config.py +++ b/src/_bentoml_sdk/service/config.py @@ -153,6 +153,7 @@ class HTTPCorsSchema(TypedDict, total=False): class HTTPSchema(TypedDict, total=False): host: IPvAnyAddress port: int + proxy_port: int cors: HTTPCorsSchema response: TypedDict("HTTPResponseSchema", {"trace_id": bool}, total=False) # type: ignore diff --git a/src/bentoml/_internal/configuration/v2/__init__.py b/src/bentoml/_internal/configuration/v2/__init__.py index db2d10a67f2..c9f7b29de08 100644 --- a/src/bentoml/_internal/configuration/v2/__init__.py +++ b/src/bentoml/_internal/configuration/v2/__init__.py @@ -121,6 +121,7 @@ s.Optional("http"): { "host": s.And(str, is_valid_ip_address), "port": s.And(int, ensure_larger_than_zero), + "proxy_port": s.And(int, ensure_larger_than_zero), "cors": { "enabled": bool, "access_control_allow_origins": s.Or([str], str, None), diff --git a/src/bentoml/_internal/configuration/v2/default_configuration.yaml b/src/bentoml/_internal/configuration/v2/default_configuration.yaml index af0d62ee962..f9744c7a701 100644 --- a/src/bentoml/_internal/configuration/v2/default_configuration.yaml +++ b/src/bentoml/_internal/configuration/v2/default_configuration.yaml @@ -46,6 +46,7 @@ services: http: host: 0.0.0.0 port: 3000 + proxy_port: 8000 cors: enabled: false access_control_allow_origins: ~ diff --git a/src/bentoml/_internal/server/base_app.py b/src/bentoml/_internal/server/base_app.py index d3eb07df416..f3a77d30f08 100644 --- a/src/bentoml/_internal/server/base_app.py +++ b/src/bentoml/_internal/server/base_app.py @@ -75,6 +75,17 @@ async def readyz(self, _: Request) -> Response: return PlainTextResponse("\n", status_code=200) raise HTTPException(500) + def metrics(self, _: Request) -> Response: + """The Prometheus metrics endpoint.""" + from ..configuration.containers import BentoMLContainer + + metrics_client = BentoMLContainer.metrics_client.get() + return Response( + metrics_client.generate_latest(), + status_code=200, + media_type=metrics_client.CONTENT_TYPE_LATEST, + ) + def __call__(self) -> Starlette: from starlette.applications import Starlette @@ -91,10 +102,14 @@ def __call__(self) -> Starlette: def routes(self) -> list[BaseRoute]: from starlette.routing import Route + from ..configuration.containers import BentoMLContainer + routes: list[BaseRoute] = [] routes.append(Route(path="/livez", name="livez", endpoint=self.livez)) routes.append(Route(path="/healthz", name="healthz", endpoint=self.livez)) routes.append(Route(path="/readyz", name="readyz", endpoint=self.readyz)) + if BentoMLContainer.api_server_config.metrics.enabled.get(): + routes.append(Route(path="/metrics", name="metrics", endpoint=self.metrics)) return routes @property diff --git a/src/bentoml/_internal/server/http/instruments.py b/src/bentoml/_internal/server/http/instruments.py index 9a5daa00288..9fb2cd5556d 100644 --- a/src/bentoml/_internal/server/http/instruments.py +++ b/src/bentoml/_internal/server/http/instruments.py @@ -89,21 +89,10 @@ async def __call__( ) -> None: if not self._is_setup: self._setup() - if not scope["type"].startswith("http"): + if not scope["type"].startswith("http") or scope["path"] == "/metrics": await self.app(scope, receive, send) return - if scope["path"] == "/metrics": - from starlette.responses import Response - - response = Response( - self.metrics_client.generate_latest(), - status_code=200, - media_type=self.metrics_client.CONTENT_TYPE_LATEST, - ) - await response(scope, receive, send) - return - endpoint = scope["path"] if not endpoint.startswith(tuple(self.skip_paths)): self.metrics_last_request_timestamp.labels( @@ -261,14 +250,7 @@ async def __call__( return if scope["type"].startswith("http") and scope["path"] == "/metrics": - from starlette.responses import Response - - response = Response( - self.metrics_client.generate_latest(), - status_code=200, - media_type=self.metrics_client.CONTENT_TYPE_LATEST, - ) - await response(scope, receive, send) + await self.app(scope, receive, send) return endpoint = scope["path"] From 0a26bfad9bcadb8c9684aa9e10380e0cc79f77a4 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Fri, 12 Sep 2025 09:39:04 +0800 Subject: [PATCH 2/4] fix: add tests Signed-off-by: Frost Ming --- src/_bentoml_impl/server/proxy.py | 6 +--- src/bentoml/_internal/server/base_app.py | 1 + .../_internal/utils/circus/__init__.py | 24 +++++++------ .../e2e/bento_new_sdk/test_custom_command.py | 27 ++++++++++++++ tests/e2e/fixtures/static_http/service.py | 36 +++++++++++++++++++ tests/e2e/fixtures/static_http/test.txt | 1 + 6 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 tests/e2e/bento_new_sdk/test_custom_command.py create mode 100644 tests/e2e/fixtures/static_http/service.py create mode 100644 tests/e2e/fixtures/static_http/test.txt diff --git a/src/_bentoml_impl/server/proxy.py b/src/_bentoml_impl/server/proxy.py index bf4ad8c9342..21f79347514 100644 --- a/src/_bentoml_impl/server/proxy.py +++ b/src/_bentoml_impl/server/proxy.py @@ -66,11 +66,7 @@ async def lifespan( # TODO: support aiohttp client client = instance_client else: - proxy_port = service.config.get("http", {}).get("proxy_port") - if proxy_port is None: - raise BentoMLConfigException( - "proxy_port must be set in service config to use custom command" - ) + proxy_port = service.config.get("http", {}).get("proxy_port", 8000) proxy_url = f"http://localhost:{proxy_port}" client = await stack.enter_async_context( httpx.AsyncClient(base_url=proxy_url, timeout=None) diff --git a/src/bentoml/_internal/server/base_app.py b/src/bentoml/_internal/server/base_app.py index f3a77d30f08..453fb81e594 100644 --- a/src/bentoml/_internal/server/base_app.py +++ b/src/bentoml/_internal/server/base_app.py @@ -10,6 +10,7 @@ from starlette.exceptions import HTTPException from starlette.middleware import Middleware from starlette.responses import PlainTextResponse +from starlette.responses import Response from ..utils import with_app_arg diff --git a/src/bentoml/_internal/utils/circus/__init__.py b/src/bentoml/_internal/utils/circus/__init__.py index 71ede772eb0..ef9be2537a9 100644 --- a/src/bentoml/_internal/utils/circus/__init__.py +++ b/src/bentoml/_internal/utils/circus/__init__.py @@ -35,25 +35,27 @@ def stop(self) -> None: return super().stop() -class ThreadedArbiter(Arbiter, Thread): +class ThreadedArbiter(Arbiter): def __init__(self, *args: t.Any, **kwargs: t.Any) -> None: - Arbiter.__init__(self, *args, **kwargs) - Thread.__init__(self, daemon=True) - self.__cb: t.Optional[t.Callable[[t.Any], t.Any]] = None + super().__init__(*args, **kwargs) + self._thread: Thread | None = None def start(self, cb: t.Callable[[t.Any], t.Any] | None = None) -> None: - self.__cb = cb - Thread.start(self) + self._thread = Thread(target=self._worker, args=(cb,), daemon=True) + self._thread.start() - def run(self) -> None: + def _worker(self, cb: t.Callable[[t.Any], t.Any] | None = None) -> None: # reset the loop in thread self.loop = None - self.ctrl.loop = self._ensure_ioloop() # type: ignore[union-attr] - Arbiter.start(self, self.__cb) + self._ensure_ioloop() + self.ctrl.loop = self.loop # type: ignore[union-attr] + super().start(cb) def stop(self) -> None: - self.loop.add_callback(Arbiter.stop, self) # type: ignore[union-attr] - Thread.join(self) + if self.loop is not None: + self.loop.add_callback(super().stop) # type: ignore[union-attr] + if self._thread is not None: + self._thread.join() def create_circus_socket_from_uri( diff --git a/tests/e2e/bento_new_sdk/test_custom_command.py b/tests/e2e/bento_new_sdk/test_custom_command.py new file mode 100644 index 00000000000..f372d4f71da --- /dev/null +++ b/tests/e2e/bento_new_sdk/test_custom_command.py @@ -0,0 +1,27 @@ +from pathlib import Path + +import bentoml + + +def test_command_in_service_argument(examples: Path) -> None: + with bentoml.serve( + "service:StaticHTTP1", working_dir=str(examples / "static_http"), port=35679 + ) as server: + with bentoml.SyncHTTPClient(server.url, server_ready_timeout=100) as client: + resp = client.request("GET", "/test.txt") + assert resp.status_code == 200 + assert resp.text.strip() == "Hello world!" + + +def test_command_in_method(examples: Path) -> None: + with bentoml.serve( + "service:StaticHTTP2", working_dir=str(examples / "static_http"), port=35680 + ) as server: + with bentoml.SyncHTTPClient(server.url, server_ready_timeout=100) as client: + resp = client.request("GET", "/test.txt") + assert resp.status_code == 200 + assert resp.text.strip() == "Hello world!" + + resp = client.request("GET", "/metrics") + assert resp.status_code == 200 + assert "# HELLO from custom metrics" in resp.text diff --git a/tests/e2e/fixtures/static_http/service.py b/tests/e2e/fixtures/static_http/service.py new file mode 100644 index 00000000000..01b64d21e64 --- /dev/null +++ b/tests/e2e/fixtures/static_http/service.py @@ -0,0 +1,36 @@ +import sys +from pathlib import Path + +import bentoml + +THIS_DIR = Path(__file__).parent + + +StaticHTTP1 = bentoml.Service( + "StaticHTTP1", + cmd=[ + sys.executable, + "-m", + "http.server", + "8000", + "--directory", + str(THIS_DIR), + ], + config={"endpoints": {"livez": "/"}}, +) + + +@bentoml.service(endpoints={"livez": "/"}) +class StaticHTTP2: + def __command__(self) -> list[str]: + return [ + sys.executable, + "-m", + "http.server", + "8000", + "--directory", + str(THIS_DIR), + ] + + def __metrics__(self, content: str) -> str: + return f"{content}\n# HELLO from custom metrics\n" diff --git a/tests/e2e/fixtures/static_http/test.txt b/tests/e2e/fixtures/static_http/test.txt new file mode 100644 index 00000000000..cd0875583aa --- /dev/null +++ b/tests/e2e/fixtures/static_http/test.txt @@ -0,0 +1 @@ +Hello world! From 4ba214985ce85544f8b09edc3fd6c2c01f106260 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Fri, 12 Sep 2025 10:32:36 +0800 Subject: [PATCH 3/4] fix: clear module from sys.modules before importing service Signed-off-by: Frost Ming --- tests/e2e/bento_new_sdk/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/e2e/bento_new_sdk/conftest.py b/tests/e2e/bento_new_sdk/conftest.py index 9992a2ec3df..afeea55abc2 100644 --- a/tests/e2e/bento_new_sdk/conftest.py +++ b/tests/e2e/bento_new_sdk/conftest.py @@ -23,3 +23,8 @@ def prepare_models() -> None: @pytest.fixture def examples() -> Path: return EXAMPLE_DIR + + +@pytest.fixture(autouse=True) +def clear_import_cache() -> None: + sys.modules.pop("service", None) From 8182ff9d75a7ca6d19d586a096b8382f9ae176a7 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Tue, 16 Sep 2025 10:54:16 +0800 Subject: [PATCH 4/4] feat: add support for Quart lifespan hooks Signed-off-by: Frost Ming --- src/_bentoml_impl/server/app.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/_bentoml_impl/server/app.py b/src/_bentoml_impl/server/app.py index 43dfb2a0b37..6e4ed84715b 100644 --- a/src/_bentoml_impl/server/app.py +++ b/src/_bentoml_impl/server/app.py @@ -27,6 +27,7 @@ from bentoml._internal.resource import system_resources from bentoml._internal.server.base_app import BaseAppFactory from bentoml._internal.server.http_app import log_exception +from bentoml._internal.types import LazyType from bentoml._internal.utils import is_async_callable from bentoml._internal.utils.metrics import exponential_buckets from bentoml.exceptions import BentoMLException @@ -452,9 +453,12 @@ async def lifespan(self, app: Starlette) -> t.AsyncGenerator[None, None]: for mount_app, *_ in self.service.mount_apps: if isinstance(mount_app, Starlette): - _ = await stack.enter_async_context( + await stack.enter_async_context( mount_app.router.lifespan_context(mount_app) ) + elif LazyType("quart.Quart").isinstance(mount_app): + await mount_app.startup() # type: ignore[attr-defined] + stack.push_async_callback(mount_app.shutdown) # type: ignore[attr-defined] else: pass # TODO: support other ASGI apps yield