Skip to content
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
2f44c26
initial impl
tedzhouhk Jun 6, 2025
1e67375
doc
tedzhouhk Jun 6, 2025
609bd8a
add connector to sla-planner
tedzhouhk Jun 9, 2025
cf5a27b
bug fix
tedzhouhk Jun 10, 2025
dcfa9f6
add note
tedzhouhk Jun 10, 2025
cf6f825
correct note
tedzhouhk Jun 10, 2025
053d703
pc
tedzhouhk Jun 10, 2025
d55b990
pc
tedzhouhk Jun 10, 2025
5c60e46
mypy
tedzhouhk Jun 10, 2025
3e8b88b
mypy
tedzhouhk Jun 10, 2025
3709874
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Jun 10, 2025
6cb95b0
Merge branch 'main' into hzhou/sla_planner_v2
tedzhouhk Jun 11, 2025
5d0fef6
Update docs/architecture/planner.md
tedzhouhk Jun 12, 2025
0f2cd44
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Jun 12, 2025
6f69a79
stash
tedzhouhk Jun 12, 2025
29380ee
Merge branch 'hzhou/sla_planner_v2' of https://github.com/ai-dynamo/d…
tedzhouhk Jun 12, 2025
2aae82e
stage
tedzhouhk Jun 12, 2025
fa6702c
finish refactor
tedzhouhk Jun 12, 2025
19d1fc5
update doc
tedzhouhk Jun 12, 2025
90b1267
address PR comments
tedzhouhk Jun 13, 2025
cb0d218
correct interpolation
tedzhouhk Jun 13, 2025
62e1328
add comment for forecasting model
tedzhouhk Jun 13, 2025
94e9691
use dataclass
tedzhouhk Jun 13, 2025
b6f3161
better code quality
tedzhouhk Jun 13, 2025
66b8402
fix init.py
tedzhouhk Jun 13, 2025
52787de
fix code for debug
tedzhouhk Jun 13, 2025
a4acd2b
fix typo
tedzhouhk Jun 13, 2025
e024c59
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Jun 13, 2025
9c66f11
pc
tedzhouhk Jun 13, 2025
6a9437e
Update components/planner/src/dynamo/planner/prometheus.py
tedzhouhk Jun 13, 2025
b08ddfd
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Jun 13, 2025
41e7d23
Update docs/architecture/planner.md
tedzhouhk Jun 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,7 @@ def benchmark_decode(isl, osl, num_request, genai_perf_artifact_dir, model_name,
y_context_length=np.array(y_context_length),
z_itl=np.array(z_itl),
z_thpt_per_gpu=np.array(z_thpt_per_gpu),
max_kv_tokens=np.array([max_kv_tokens]),
)
logger.info(f"Saved data points to {save_path}")

Expand Down
5 changes: 3 additions & 2 deletions components/planner/src/dynamo/planner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
"LocalConnector",
"PlannerConnector",
"KubernetesConnector",
"PlannerDefaults",
"LoadPlannerDefaults",
"SLAPlannerDefaults",
]

# Import the classes
from dynamo.planner.circusd import CircusController
from dynamo.planner.defaults import PlannerDefaults
from dynamo.planner.defaults import LoadPlannerDefaults, SLAPlannerDefaults
from dynamo.planner.kubernetes_connector import KubernetesConnector
from dynamo.planner.local_connector import LocalConnector
from dynamo.planner.planner_connector import PlannerConnector
4 changes: 4 additions & 0 deletions components/planner/src/dynamo/planner/circusd.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ async def remove_watcher(
waiting: bool = True,
max_retries: int = 3,
retry_delay: float = 2.0,
blocking: bool = True,
timeout: int = 600, # 10 minutes
) -> bool:
"""
Expand All @@ -185,8 +186,11 @@ async def remove_watcher(
response = self.client.send_message("signal", name=name, signum="SIGTERM")
if response.get("status") != "ok":
logger.warning(f"Failed to send SIGTERM to {name}: {response}")
if not blocking:
return True
except Exception as e:
logger.warning(f"Error sending SIGTERM to {name}: {e}")
return False

# Now wait for the process to exit gracefully
exited = await self._wait_for_process_graceful_exit(name, timeout)
Expand Down
26 changes: 20 additions & 6 deletions components/planner/src/dynamo/planner/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,32 @@


# Source of truth for planner defaults
class PlannerDefaults:
class BasePlannerDefaults:
namespace = "dynamo"
environment = "local"
no_operation = False
log_dir = None
adjustment_interval = 10
metric_pulling_interval = 1
adjustment_interval = 180 # in seconds
max_gpu_budget = 8
min_endpoint = 1
min_endpoint = 1 # applies to both decode and prefill
decode_engine_num_gpu = 1
prefill_engine_num_gpu = 1


class LoadPlannerDefaults(BasePlannerDefaults):
metric_pulling_interval = 10 # in seconds
decode_kv_scale_up_threshold = 0.9
decode_kv_scale_down_threshold = 0.5
prefill_queue_scale_up_threshold = 5.0
prefill_queue_scale_down_threshold = 0.2
decode_engine_num_gpu = 1
prefill_engine_num_gpu = 1


class SLAPlannerDefaults(BasePlannerDefaults):
prometheus_endpoint = "http://localhost:9090"
profile_results_dir = "profiling_results"
isl = 3000 # in number of tokens
osl = 150 # in number of tokens
ttft = 0.5 # in seconds
itl = 0.05 # in seconds
load_predictor = "arima" # ["constant", "arima", "prophet"]
load_prediction_window_size = 50 # predict load using how many recent load samples
9 changes: 6 additions & 3 deletions components/planner/src/dynamo/planner/local_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,13 @@ async def remove_component(
target_watcher = matching_components[highest_suffix]
logger.info(f"Removing watcher {target_watcher}")

success = await self.circus.remove_watcher(name=target_watcher)
logger.info(
f"Circus remove_watcher for {target_watcher} {'succeeded' if success else 'failed'}"
success = await self.circus.remove_watcher(
name=target_watcher, blocking=blocking
)
if not blocking:
logger.info(
f"Circus remove_watcher for {target_watcher} {'succeeded' if success else 'failed'}"
)

if success:
if highest_suffix > 0: # Numbered watcher - remove entire entry
Expand Down
115 changes: 115 additions & 0 deletions components/planner/src/dynamo/planner/planner_sla.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import asyncio
import logging

from pydantic import BaseModel

from dynamo.planner.defaults import SLAPlannerDefaults
from dynamo.planner.utils.planner_core import start_sla_planner
from dynamo.runtime.logging import configure_dynamo_logging
from dynamo.sdk import async_on_start, dynamo_context, endpoint, service
from dynamo.sdk.core.protocol.interface import ComponentType
from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.image import DYNAMO_IMAGE

logger = logging.getLogger(__name__)

# start planner 30 seconds after the other components to make sure planner can see them
# TODO: remove this delay
INIT_PLANNER_START_DELAY = 30


class RequestType(BaseModel):
text: str


@service(
dynamo={
"namespace": "dynamo",
"component_type": ComponentType.PLANNER,
},
resources={"cpu": "10", "memory": "20Gi"},
workers=1,
image=DYNAMO_IMAGE,
)
class Planner:
def __init__(self):
configure_dynamo_logging(service_name="Planner")
logger.info("Starting planner")
self.runtime = dynamo_context["runtime"]

config = ServiceConfig.get_instance()

# Get namespace directly from dynamo_context as it contains the active namespace
self.namespace = dynamo_context["namespace"]
config_instance = config.get("Planner", {})

self.args = argparse.Namespace(
namespace=self.namespace,
environment=config_instance.get(
"environment", SLAPlannerDefaults.environment
),
no_operation=config_instance.get(
"no-operation", SLAPlannerDefaults.no_operation
),
log_dir=config_instance.get("log-dir", SLAPlannerDefaults.log_dir),
adjustment_interval=config_instance.get(
"adjustment-interval", SLAPlannerDefaults.adjustment_interval
),
max_gpu_budget=config_instance.get(
"max-gpu-budget", SLAPlannerDefaults.max_gpu_budget
),
min_endpoint=config_instance.get(
"min-endpoint", SLAPlannerDefaults.min_endpoint
),
decode_engine_num_gpu=config_instance.get(
"decode-engine-num-gpu", SLAPlannerDefaults.decode_engine_num_gpu
),
prefill_engine_num_gpu=config_instance.get(
"prefill-engine-num-gpu", SLAPlannerDefaults.prefill_engine_num_gpu
),
prometheus_endpoint=config_instance.get(
"prometheus-endpoint", SLAPlannerDefaults.prometheus_endpoint
),
profile_results_dir=config_instance.get(
"profile-results-dir", SLAPlannerDefaults.profile_results_dir
),
isl=config_instance.get("isl", SLAPlannerDefaults.isl),
osl=config_instance.get("osl", SLAPlannerDefaults.osl),
ttft=config_instance.get("ttft", SLAPlannerDefaults.ttft),
itl=config_instance.get("itl", SLAPlannerDefaults.itl),
load_predictor=config_instance.get(
"load-predictor", SLAPlannerDefaults.load_predictor
),
load_prediction_window_size=config_instance.get(
"load-prediction-window-size",
SLAPlannerDefaults.load_prediction_window_size,
),
)

@async_on_start
async def async_init(self):
await asyncio.sleep(INIT_PLANNER_START_DELAY)
logger.info("Calling start_planner")
await start_sla_planner(self.runtime, self.args)
logger.info("Planner started")

@endpoint()
async def generate(self, request: RequestType):
"""Dummy endpoint to satisfy that each component has an endpoint"""
yield "mock endpoint"
67 changes: 67 additions & 0 deletions components/planner/src/dynamo/planner/prometheus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import subprocess
import tempfile

import yaml

from dynamo.sdk import service
from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.image import DYNAMO_IMAGE

logger = logging.getLogger(__name__)


@service(
dynamo={
"namespace": "dynamo",
},
workers=1,
image=DYNAMO_IMAGE,
)
class Prometheus:
def __init__(self):
"""Initialize Frontend service with HTTP server and model configuration."""
self.config = ServiceConfig.get_parsed_config("Prometheus")
self.process = None

logger.warning(f"Prometheus config: {self.config}")

self.start_prometheus_server()

def start_prometheus_server(self):
logger.info("Starting prometheus server...")

self.temp_file = tempfile.NamedTemporaryFile(
mode="w", suffix=".yml", delete=False
)
yaml.dump(self.config, self.temp_file)
self.temp_file.close()
config_path = self.temp_file.name

cmd = [
"prometheus",
f"--config.file={config_path}",
]

logger.info(f"Prometheus cmd: {cmd}")

self.process = subprocess.Popen(
cmd,
stdout=None,
stderr=None,
)
14 changes: 14 additions & 0 deletions components/planner/src/dynamo/planner/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading