diff --git a/Containerfile b/Containerfile index 0136adb8..f98b6448 100644 --- a/Containerfile +++ b/Containerfile @@ -19,7 +19,7 @@ RUN pip3.12 install uv COPY ${LSC_SOURCE_DIR}/src ./src COPY ${LSC_SOURCE_DIR}/pyproject.toml ${LSC_SOURCE_DIR}/LICENSE ${LSC_SOURCE_DIR}/README.md ${LSC_SOURCE_DIR}/uv.lock ./ -RUN uv sync --locked --no-install-project --no-dev +RUN uv sync --locked --no-dev # Final image without uv package manager diff --git a/docker-compose.yaml b/docker-compose.yaml index ee0c2cd7..e00ff5df 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -14,7 +14,9 @@ services: - lightspeednet lightspeed-stack: - image: quay.io/lightspeed-core/lightspeed-stack:latest + build: + context: . + dockerfile: Containerfile container_name: lightspeed-stack ports: - "8080:8080" @@ -29,4 +31,4 @@ services: networks: lightspeednet: - driver: bridge \ No newline at end of file + driver: bridge diff --git a/src/app/endpoints/metrics.py b/src/app/endpoints/metrics.py index 9a8eb1a2..f386f259 100644 --- a/src/app/endpoints/metrics.py +++ b/src/app/endpoints/metrics.py @@ -7,10 +7,15 @@ CONTENT_TYPE_LATEST, ) +from metrics.utils import setup_model_metrics + router = APIRouter(tags=["metrics"]) @router.get("/metrics", response_class=PlainTextResponse) -def metrics_endpoint_handler(_request: Request) -> PlainTextResponse: +async def metrics_endpoint_handler(_request: Request) -> PlainTextResponse: """Handle request to the /metrics endpoint.""" + # Setup the model metrics if not already done. This is a one-time setup + # and will not be run again on subsequent calls to this endpoint + await setup_model_metrics() return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST) diff --git a/src/app/main.py b/src/app/main.py index 6038398e..fcbb7f5c 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -10,7 +10,6 @@ from configuration import configuration from log import get_logger import metrics -from metrics.utils import setup_model_metrics from utils.common import register_mcp_servers_async import version @@ -81,6 +80,4 @@ async def startup_event() -> None: logger.info("Registering MCP servers") await register_mcp_servers_async(logger, configuration.configuration) get_logger("app.endpoints.handlers") - logger.info("Setting up model metrics") - await setup_model_metrics() logger.info("App startup complete") diff --git a/src/metrics/utils.py b/src/metrics/utils.py index 0dcd7b50..cece371c 100644 --- a/src/metrics/utils.py +++ b/src/metrics/utils.py @@ -4,12 +4,15 @@ from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder from log import get_logger import metrics +from utils.common import run_once_async logger = get_logger(__name__) +@run_once_async async def setup_model_metrics() -> None: """Perform setup of all metrics related to LLM model and provider.""" + logger.info("Setting up model metrics") model_list = [] if configuration.llama_stack_configuration.use_as_library_client: model_list = await AsyncLlamaStackClientHolder().get_client().models.list() @@ -48,3 +51,4 @@ async def setup_model_metrics() -> None: model_name, default_model_value, ) + logger.info("Model metrics setup complete") diff --git a/src/utils/common.py b/src/utils/common.py index 001d2c3f..3f654ed5 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -1,10 +1,11 @@ """Common utilities for the project.""" -from typing import Any, List, cast +import asyncio +from functools import wraps from logging import Logger +from typing import Any, List, cast, Callable from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient - from llama_stack.distribution.library_client import ( AsyncLlamaStackAsLibraryClient, ) @@ -103,3 +104,18 @@ def _register_mcp_toolgroups_sync( client.toolgroups.register(**registration_params) logger.debug("MCP server %s registered successfully", mcp.name) + + +def run_once_async(func: Callable) -> Callable: + """Decorate an async function to run only once.""" + task = None + + @wraps(func) + async def wrapper(*args: Any, **kwargs: Any) -> Any: + nonlocal task + if task is None: + loop = asyncio.get_running_loop() + task = loop.create_task(func(*args, **kwargs)) + return await task + + return wrapper diff --git a/test.containerfile b/test.containerfile index 4586fb59..b6b198a1 100644 --- a/test.containerfile +++ b/test.containerfile @@ -17,7 +17,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh RUN uv -h RUN uv venv && \ - uv pip install llama-stack \ + uv pip install llama-stack==0.2.16 \ fastapi \ opentelemetry-sdk \ opentelemetry-exporter-otlp \ diff --git a/tests/e2e/features/rest_api.feature b/tests/e2e/features/rest_api.feature index 541eab63..ec88fe7f 100644 --- a/tests/e2e/features/rest_api.feature +++ b/tests/e2e/features/rest_api.feature @@ -10,7 +10,7 @@ Feature: REST API tests Scenario: Check if service report proper readiness state Given the system is in default state - When I access REST API endpoint "readiness" using HTTP GET method + When I access endpoint "readiness" using HTTP GET method Then The status code of the response is 200 And The body of the response has the following schema """ @@ -28,7 +28,7 @@ Feature: REST API tests Scenario: Check if service report proper liveness state Given the system is in default state - When I access REST API endpoint "liveness" using HTTP GET method + When I access endpoint "liveness" using HTTP GET method Then The status code of the response is 200 And The body of the response has the following schema """ diff --git a/tests/unit/app/endpoints/test_metrics.py b/tests/unit/app/endpoints/test_metrics.py index 19545541..1bddc3c6 100644 --- a/tests/unit/app/endpoints/test_metrics.py +++ b/tests/unit/app/endpoints/test_metrics.py @@ -3,15 +3,20 @@ from app.endpoints.metrics import metrics_endpoint_handler -def test_metrics_endpoint(): +async def test_metrics_endpoint(mocker): """Test the metrics endpoint handler.""" - response = metrics_endpoint_handler(None) + mock_setup_metrics = mocker.patch( + "app.endpoints.metrics.setup_model_metrics", return_value=None + ) + response = await metrics_endpoint_handler(None) assert response is not None assert response.status_code == 200 assert "text/plain" in response.headers["Content-Type"] response_body = response.body.decode() + # Assert metrics were set up + mock_setup_metrics.assert_called_once() # Check if the response contains Prometheus metrics format assert "# TYPE ls_rest_api_calls_total counter" in response_body assert "# TYPE ls_response_duration_seconds histogram" in response_body