Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ RUN pip3.12 install uv
COPY ${LSC_SOURCE_DIR}/src ./src
COPY ${LSC_SOURCE_DIR}/pyproject.toml ${LSC_SOURCE_DIR}/LICENSE ${LSC_SOURCE_DIR}/README.md ${LSC_SOURCE_DIR}/uv.lock ./

RUN uv sync --locked --no-install-project --no-dev
RUN uv sync --locked --no-dev


# Final image without uv package manager
Expand Down
6 changes: 4 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ services:
- lightspeednet

lightspeed-stack:
image: quay.io/lightspeed-core/lightspeed-stack:latest
build:
context: .
dockerfile: Containerfile
container_name: lightspeed-stack
ports:
- "8080:8080"
Expand All @@ -29,4 +31,4 @@ services:

networks:
lightspeednet:
driver: bridge
driver: bridge
7 changes: 6 additions & 1 deletion src/app/endpoints/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
CONTENT_TYPE_LATEST,
)

from metrics.utils import setup_model_metrics

router = APIRouter(tags=["metrics"])


@router.get("/metrics", response_class=PlainTextResponse)
def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
async def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
"""Handle request to the /metrics endpoint."""
# Setup the model metrics if not already done. This is a one-time setup
# and will not be run again on subsequent calls to this endpoint
await setup_model_metrics()
return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
3 changes: 0 additions & 3 deletions src/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from configuration import configuration
from log import get_logger
import metrics
from metrics.utils import setup_model_metrics
from utils.common import register_mcp_servers_async
import version

Expand Down Expand Up @@ -81,6 +80,4 @@ async def startup_event() -> None:
logger.info("Registering MCP servers")
await register_mcp_servers_async(logger, configuration.configuration)
get_logger("app.endpoints.handlers")
logger.info("Setting up model metrics")
await setup_model_metrics()
logger.info("App startup complete")
4 changes: 4 additions & 0 deletions src/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder
from log import get_logger
import metrics
from utils.common import run_once_async

logger = get_logger(__name__)


@run_once_async
async def setup_model_metrics() -> None:
"""Perform setup of all metrics related to LLM model and provider."""
logger.info("Setting up model metrics")
model_list = []
if configuration.llama_stack_configuration.use_as_library_client:
model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
Expand Down Expand Up @@ -48,3 +51,4 @@ async def setup_model_metrics() -> None:
model_name,
default_model_value,
)
logger.info("Model metrics setup complete")
20 changes: 18 additions & 2 deletions src/utils/common.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Common utilities for the project."""

from typing import Any, List, cast
import asyncio
from functools import wraps
from logging import Logger
from typing import Any, List, cast, Callable

from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient

from llama_stack.distribution.library_client import (
AsyncLlamaStackAsLibraryClient,
)
Expand Down Expand Up @@ -103,3 +104,18 @@ def _register_mcp_toolgroups_sync(

client.toolgroups.register(**registration_params)
logger.debug("MCP server %s registered successfully", mcp.name)


def run_once_async(func: Callable) -> Callable:
"""Decorate an async function to run only once."""
task = None

@wraps(func)
async def wrapper(*args: Any, **kwargs: Any) -> Any:
nonlocal task
if task is None:
loop = asyncio.get_running_loop()
task = loop.create_task(func(*args, **kwargs))
return await task

return wrapper
2 changes: 1 addition & 1 deletion test.containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN uv -h

RUN uv venv && \
uv pip install llama-stack \
uv pip install llama-stack==0.2.16 \
fastapi \
opentelemetry-sdk \
opentelemetry-exporter-otlp \
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/features/rest_api.feature
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Feature: REST API tests

Scenario: Check if service report proper readiness state
Given the system is in default state
When I access REST API endpoint "readiness" using HTTP GET method
When I access endpoint "readiness" using HTTP GET method
Then The status code of the response is 200
And The body of the response has the following schema
"""
Expand All @@ -28,7 +28,7 @@ Feature: REST API tests

Scenario: Check if service report proper liveness state
Given the system is in default state
When I access REST API endpoint "liveness" using HTTP GET method
When I access endpoint "liveness" using HTTP GET method
Then The status code of the response is 200
And The body of the response has the following schema
"""
Expand Down
9 changes: 7 additions & 2 deletions tests/unit/app/endpoints/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,20 @@
from app.endpoints.metrics import metrics_endpoint_handler


def test_metrics_endpoint():
async def test_metrics_endpoint(mocker):
"""Test the metrics endpoint handler."""
response = metrics_endpoint_handler(None)
mock_setup_metrics = mocker.patch(
"app.endpoints.metrics.setup_model_metrics", return_value=None
)
response = await metrics_endpoint_handler(None)
assert response is not None
assert response.status_code == 200
assert "text/plain" in response.headers["Content-Type"]

response_body = response.body.decode()

# Assert metrics were set up
mock_setup_metrics.assert_called_once()
# Check if the response contains Prometheus metrics format
assert "# TYPE ls_rest_api_calls_total counter" in response_body
assert "# TYPE ls_response_duration_seconds histogram" in response_body
Expand Down