Skip to content

Commit 528626e

Browse files
committed
Add model label to Component
1 parent fa4a7f1 commit 528626e

File tree

14 files changed

+76
-17
lines changed

14 files changed

+76
-17
lines changed

components/backends/vllm/src/dynamo/vllm/main.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,10 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
131131
"""
132132
Instantiate and serve
133133
"""
134-
component = runtime.namespace(config.namespace).component(config.component)
134+
135+
component = runtime.namespace(config.namespace).component(
136+
config.component, config.model
137+
)
135138
await component.create_service()
136139

137140
generate_endpoint = component.endpoint(config.endpoint)
@@ -164,7 +167,9 @@ async def init(runtime: DistributedRuntime, config: Config):
164167
Instantiate and serve
165168
"""
166169

167-
component = runtime.namespace(config.namespace).component(config.component)
170+
component = runtime.namespace(config.namespace).component(
171+
config.component, config.model
172+
)
168173
await component.create_service()
169174

170175
generate_endpoint = component.endpoint(config.endpoint)

components/metrics/src/bin/mock_worker.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ async fn backend(runtime: DistributedRuntime) -> Result<()> {
146146
let namespace = runtime.namespace("dynamo")?;
147147
// we must first create a service, then we can attach one more more endpoints
148148
let component = namespace
149-
.component("MyComponent")?
149+
.component("MyComponent", Some("MyModel".to_string()))?
150150
.service_builder()
151151
.create()
152152
.await?;

components/metrics/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ impl MetricsMode {
145145
pub struct LLMWorkerLoadCapacityConfig {
146146
pub component_name: String,
147147
pub endpoint_name: String,
148+
pub model_name: Option<String>,
148149
}
149150

150151
/// Metrics collector for exposing metrics to prometheus/grafana

components/metrics/src/main.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ struct Args {
6060
#[arg(long)]
6161
endpoint: String,
6262

63+
/// Model name for the target component (optional)
64+
#[arg(long)]
65+
model_name: String,
66+
6367
/// Polling interval in seconds for scraping dynamo endpoint stats (minimum 1 second)
6468
#[arg(long, default_value = "1")]
6569
poll_interval: u64,
@@ -109,6 +113,7 @@ fn get_config(args: &Args) -> Result<LLMWorkerLoadCapacityConfig> {
109113
Ok(LLMWorkerLoadCapacityConfig {
110114
component_name: args.component.clone(),
111115
endpoint_name: args.endpoint.clone(),
116+
model_name: Some(args.model_name.clone()),
112117
})
113118
}
114119

@@ -120,7 +125,9 @@ async fn app(runtime: Runtime) -> Result<()> {
120125
let drt = DistributedRuntime::from_settings(runtime.clone()).await?;
121126

122127
let namespace = drt.namespace(args.namespace)?;
123-
let component = namespace.component("count")?;
128+
// The metrics aggregator operates independently of any model,
129+
// hence the model name is set to None.
130+
let component = namespace.component("count", None)?;
124131

125132
// Create unique instance of Count
126133
let key = format!("{}/instance", component.etcd_root());
@@ -131,7 +138,7 @@ async fn app(runtime: Runtime) -> Result<()> {
131138
.await
132139
.context("Unable to create unique instance of Count; possibly one already exists")?;
133140

134-
let target_component = namespace.component(&config.component_name)?;
141+
let target_component = namespace.component(&config.component_name, config.model_name.clone())?;
135142
let target_endpoint = target_component.endpoint(&config.endpoint_name);
136143

137144
let service_path = target_endpoint.path();

components/router/src/main.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,11 @@ async fn app(runtime: Runtime) -> Result<()> {
6060
let args = Args::parse();
6161
let runtime = DistributedRuntime::from_settings(runtime).await?;
6262

63+
// The metrics aggregator operates independently of any model,
64+
// hence the model name is set to None.
6365
let component = runtime
6466
.namespace(&args.namespace)?
65-
.component(&args.component)?;
67+
.component(&args.component, None)?;
6668

6769
let selector = Box::new(CustomWorkerSelector::default());
6870

launch/dynamo-run/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ async fn engine_for(
138138
let endpoint = local_model.endpoint_id().clone();
139139

140140
let engine =
141-
dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, args).await?;
141+
dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, Some(local_model.card().service_name.clone()), args).await?;
142142

143143
Ok(EngineConfig::StaticCore {
144144
engine,

lib/bindings/c/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@ fn dynamo_create_kv_publisher(
147147
.ok_or(anyhow::Error::msg("Could not get Distributed Runtime"))
148148
{
149149
Ok(drt) => {
150-
let backend = drt.namespace(namespace)?.component(component)?;
150+
// The model publisher isn't tied to a specific model. Thus, the model name
151+
// is set to None.
152+
let backend = drt.namespace(namespace)?.component(component, None)?;
151153
KvEventPublisher::new(backend, worker_id, kv_block_size, None)
152154
}
153155
Err(e) => Err(e),

lib/bindings/python/rust/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,8 +535,9 @@ impl Endpoint {
535535

536536
#[pymethods]
537537
impl Namespace {
538-
fn component(&self, name: String) -> PyResult<Component> {
539-
let inner = self.inner.component(name).map_err(to_pyerr)?;
538+
#[pyo3(signature = (name, model = None))]
539+
fn component(&self, name: String, model: Option<String>) -> PyResult<Component> {
540+
let inner = self.inner.component(name, model).map_err(to_pyerr)?;
540541
Ok(Component {
541542
inner,
542543
event_loop: self.event_loop.clone(),

lib/bindings/python/rust/llm/entrypoint.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ async fn select_engine(
211211
let engine = dynamo_llm::mocker::engine::make_mocker_engine(
212212
distributed_runtime.inner,
213213
endpoint,
214+
Some(local_model.card().service_name.clone()),
214215
mocker_args,
215216
)
216217
.await?;

lib/llm/src/discovery/watcher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ impl ModelWatcher {
169169
let component = self
170170
.drt
171171
.namespace(&endpoint_id.namespace)?
172-
.component(&endpoint_id.component)?;
172+
.component(&endpoint_id.component, Some(model_entry.name.clone()))?;
173173
let client = component.endpoint(&endpoint_id.name).client().await?;
174174

175175
let Some(etcd_client) = self.drt.etcd_client() else {

0 commit comments

Comments
 (0)