diff --git a/Containerfile b/Containerfile
index 0136adb8..f98b6448 100644
--- a/Containerfile
+++ b/Containerfile
@@ -19,7 +19,7 @@ RUN pip3.12 install uv
 COPY ${LSC_SOURCE_DIR}/src ./src
 COPY ${LSC_SOURCE_DIR}/pyproject.toml ${LSC_SOURCE_DIR}/LICENSE ${LSC_SOURCE_DIR}/README.md ${LSC_SOURCE_DIR}/uv.lock ./
 
-RUN uv sync --locked --no-install-project --no-dev
+RUN uv sync --locked --no-dev
 
 
 # Final image without uv package manager
diff --git a/docker-compose.yaml b/docker-compose.yaml
index ee0c2cd7..e00ff5df 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -14,7 +14,9 @@ services:
       - lightspeednet
 
   lightspeed-stack:
-    image: quay.io/lightspeed-core/lightspeed-stack:latest
+    build:
+      context: .
+      dockerfile: Containerfile
     container_name: lightspeed-stack
     ports:
       - "8080:8080"
@@ -29,4 +31,4 @@ services:
 
 networks:
   lightspeednet:
-    driver: bridge
\ No newline at end of file
+    driver: bridge
diff --git a/src/app/endpoints/metrics.py b/src/app/endpoints/metrics.py
index 9a8eb1a2..f386f259 100644
--- a/src/app/endpoints/metrics.py
+++ b/src/app/endpoints/metrics.py
@@ -7,10 +7,15 @@
     CONTENT_TYPE_LATEST,
 )
 
+from metrics.utils import setup_model_metrics
+
 router = APIRouter(tags=["metrics"])
 
 
 @router.get("/metrics", response_class=PlainTextResponse)
-def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
+async def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
     """Handle request to the /metrics endpoint."""
+    # Setup the model metrics if not already done. This is a one-time setup
+    # and will not be run again on subsequent calls to this endpoint
+    await setup_model_metrics()
     return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
diff --git a/src/app/main.py b/src/app/main.py
index 6038398e..fcbb7f5c 100644
--- a/src/app/main.py
+++ b/src/app/main.py
@@ -10,7 +10,6 @@
 from configuration import configuration
 from log import get_logger
 import metrics
-from metrics.utils import setup_model_metrics
 from utils.common import register_mcp_servers_async
 import version
 
@@ -81,6 +80,4 @@ async def startup_event() -> None:
     logger.info("Registering MCP servers")
     await register_mcp_servers_async(logger, configuration.configuration)
     get_logger("app.endpoints.handlers")
-    logger.info("Setting up model metrics")
-    await setup_model_metrics()
     logger.info("App startup complete")
diff --git a/src/metrics/utils.py b/src/metrics/utils.py
index 0dcd7b50..cece371c 100644
--- a/src/metrics/utils.py
+++ b/src/metrics/utils.py
@@ -4,12 +4,15 @@
 from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder
 from log import get_logger
 import metrics
+from utils.common import run_once_async
 
 logger = get_logger(__name__)
 
 
+@run_once_async
 async def setup_model_metrics() -> None:
     """Perform setup of all metrics related to LLM model and provider."""
+    logger.info("Setting up model metrics")
     model_list = []
     if configuration.llama_stack_configuration.use_as_library_client:
         model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
@@ -48,3 +51,4 @@ async def setup_model_metrics() -> None:
                 model_name,
                 default_model_value,
             )
+    logger.info("Model metrics setup complete")
diff --git a/src/utils/common.py b/src/utils/common.py
index 001d2c3f..3f654ed5 100644
--- a/src/utils/common.py
+++ b/src/utils/common.py
@@ -1,10 +1,11 @@
 """Common utilities for the project."""
 
-from typing import Any, List, cast
+import asyncio
+from functools import wraps
 from logging import Logger
+from typing import Any, List, cast, Callable
 
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-
 from llama_stack.distribution.library_client import (
     AsyncLlamaStackAsLibraryClient,
 )
@@ -103,3 +104,18 @@ def _register_mcp_toolgroups_sync(
 
             client.toolgroups.register(**registration_params)
             logger.debug("MCP server %s registered successfully", mcp.name)
+
+
+def run_once_async(func: Callable) -> Callable:
+    """Decorate an async function to run only once."""
+    task = None
+
+    @wraps(func)
+    async def wrapper(*args: Any, **kwargs: Any) -> Any:
+        nonlocal task
+        if task is None:
+            loop = asyncio.get_running_loop()
+            task = loop.create_task(func(*args, **kwargs))
+        return await task
+
+    return wrapper
diff --git a/test.containerfile b/test.containerfile
index 4586fb59..b6b198a1 100644
--- a/test.containerfile
+++ b/test.containerfile
@@ -17,7 +17,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 RUN uv -h
 
 RUN uv venv && \
-    uv pip install llama-stack \
+    uv pip install llama-stack==0.2.16 \
     fastapi \
     opentelemetry-sdk \
     opentelemetry-exporter-otlp \
diff --git a/tests/e2e/features/rest_api.feature b/tests/e2e/features/rest_api.feature
index 541eab63..ec88fe7f 100644
--- a/tests/e2e/features/rest_api.feature
+++ b/tests/e2e/features/rest_api.feature
@@ -10,7 +10,7 @@ Feature: REST API tests
 
   Scenario: Check if service report proper readiness state
     Given the system is in default state
-     When I access REST API endpoint "readiness" using HTTP GET method
+     When I access endpoint "readiness" using HTTP GET method
      Then The status code of the response is 200
       And The body of the response has the following schema
           """
@@ -28,7 +28,7 @@ Feature: REST API tests
 
   Scenario: Check if service report proper liveness state
     Given the system is in default state
-     When I access REST API endpoint "liveness" using HTTP GET method
+     When I access endpoint "liveness" using HTTP GET method
      Then The status code of the response is 200
       And The body of the response has the following schema
           """
diff --git a/tests/unit/app/endpoints/test_metrics.py b/tests/unit/app/endpoints/test_metrics.py
index 19545541..1bddc3c6 100644
--- a/tests/unit/app/endpoints/test_metrics.py
+++ b/tests/unit/app/endpoints/test_metrics.py
@@ -3,15 +3,20 @@
 from app.endpoints.metrics import metrics_endpoint_handler
 
 
-def test_metrics_endpoint():
+async def test_metrics_endpoint(mocker):
     """Test the metrics endpoint handler."""
-    response = metrics_endpoint_handler(None)
+    mock_setup_metrics = mocker.patch(
+        "app.endpoints.metrics.setup_model_metrics", return_value=None
+    )
+    response = await metrics_endpoint_handler(None)
     assert response is not None
     assert response.status_code == 200
     assert "text/plain" in response.headers["Content-Type"]
 
     response_body = response.body.decode()
 
+    # Assert metrics were set up
+    mock_setup_metrics.assert_called_once()
     # Check if the response contains Prometheus metrics format
     assert "# TYPE ls_rest_api_calls_total counter" in response_body
     assert "# TYPE ls_response_duration_seconds histogram" in response_body