Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
"router_queue_threshold",
"router_event_threads",
"router_queue_policy",
"remote_indexer_component",
"use_remote_indexer",
"serve_indexer",
)


Expand All @@ -61,7 +62,8 @@ class KvRouterConfigBase(ConfigBase):
router_queue_threshold: Optional[float]
router_event_threads: int
router_queue_policy: str
remote_indexer_component: Optional[str]
use_remote_indexer: bool = False
serve_indexer: bool = False

def kv_router_kwargs(self) -> dict:
"""Return a dict suitable for ``KvRouterConfig(**kwargs)``."""
Expand Down Expand Up @@ -286,15 +288,14 @@ def add_arguments(self, parser) -> None:
arg_type=str,
choices=["fcfs", "wspt"],
)
add_argument(
add_negatable_bool_argument(
g,
flag_name="--remote-indexer-component",
env_var="DYN_REMOTE_INDEXER_COMPONENT",
default=None,
flag_name="--use-remote-indexer",
env_var="DYN_USE_REMOTE_INDEXER",
default=False,
help=(
"[EXPERIMENTAL] KV Router: Component name of a standalone KV indexer to use for overlap scoring. "
"When set, the router queries the standalone indexer via the request plane instead "
"of maintaining a local radix tree (e.g. 'kv-indexer')."
"[EXPERIMENTAL] KV Router: Query a remote KV indexer served from the worker "
"component via the request plane instead of maintaining a local radix tree."
),
arg_type=str,
dest="use_remote_indexer",
)
15 changes: 15 additions & 0 deletions components/src/dynamo/frontend/frontend_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ def validate(self) -> None:
"--router-prefill-load-model=aic requires "
"--router-track-prefill-tokens"
)
if self.serve_indexer:
if self.router_mode != "kv":
raise ValueError("--serve-indexer requires --router-mode=kv")
if self.use_remote_indexer:
raise ValueError(
"--serve-indexer and --use-remote-indexer are mutually exclusive"
)


@register_encoder(FrontendConfig)
Expand Down Expand Up @@ -193,6 +200,14 @@ def add_arguments(self, parser) -> None:
help="HTTP port for the engine (u16).",
arg_type=int,
)
add_negatable_bool_argument(
g,
flag_name="--serve-indexer",
env_var="DYN_SERVE_INDEXER",
default=False,
help="Serve this frontend's local KV indexers over the request plane.",
dest="serve_indexer",
)
add_argument(
g,
flag_name="--tls-cert-path",
Expand Down
16 changes: 15 additions & 1 deletion components/src/dynamo/router/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
KvRouterArgGroup,
KvRouterConfigBase,
)
from dynamo.common.configuration.utils import add_argument
from dynamo.common.configuration.utils import add_argument, add_negatable_bool_argument
from dynamo.llm import AicPerfConfig, KvRouterConfig


Expand All @@ -25,6 +25,7 @@ class DynamoRouterConfig(KvRouterConfigBase, AicPerfConfigBase):
namespace: str
endpoint: str
router_block_size: int
serve_indexer: bool = False

def validate(self) -> None:
"""Validate config invariants (aligned with Rust KvRouterConfig where applicable)."""
Expand All @@ -40,6 +41,10 @@ def validate(self) -> None:
"Expected format: namespace.component.endpoint"
)
self.namespace = parts[0]
if self.serve_indexer and self.use_remote_indexer:
raise ValueError(
"--serve-indexer and --use-remote-indexer are mutually exclusive"
)
if self.router_prefill_load_model == "aic":
missing = [
flag
Expand Down Expand Up @@ -89,6 +94,15 @@ def add_arguments(self, parser) -> None:
obsolete_flag="--block-size",
)

add_negatable_bool_argument(
g,
flag_name="--serve-indexer",
env_var="DYN_SERVE_INDEXER",
default=False,
help="Serve this router's local KV indexer over the request plane.",
dest="serve_indexer",
)

# KV router options (shared with dynamo.frontend)
KvRouterArgGroup().add_arguments(parser)
AicPerfArgGroup().add_arguments(parser)
Expand Down
4 changes: 2 additions & 2 deletions container/templates/wheel_builder.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
maturin build --release --features "media-ffmpeg,kv-indexer,kv-indexer-runtime" --out /opt/dynamo/dist; \
maturin build --release --features "media-ffmpeg,kv-indexer" --out /opt/dynamo/dist; \
else \
maturin build --release --features "kv-indexer,kv-indexer-runtime" --out /opt/dynamo/dist; \
maturin build --release --features "kv-indexer" --out /opt/dynamo/dist; \
fi && \
/tmp/use-sccache.sh show-stats "Dynamo Runtime"

Expand Down
61 changes: 60 additions & 1 deletion docs/components/router/router-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ When using KV routing, the router needs to know what each worker has cached. The
|------------|---------------|-------------|
| **NATS Core (local indexer)** | Default (no extra flags) | Workers maintain a local indexer; router queries workers on startup and receives events via NATS Core |
| **JetStream (durable)** | `--router-durable-kv-events` | Events persisted in NATS JetStream; supports snapshots and durable consumers. *Deprecated.* |
| **ZMQ** | `--event-plane zmq` | Workers publish via ZMQ PUB sockets; standalone indexer aggregates events |
| **ZMQ** | `--event-plane zmq` | Workers publish via ZMQ PUB sockets; the standalone `dynamo.indexer` service aggregates events |
| **Approximate (no events)** | `--no-router-kv-events` | No events consumed; router predicts cache state from its own routing decisions with TTL-based expiration |

### Aggregated vs. Disaggregated Topology
Expand Down Expand Up @@ -93,6 +93,8 @@ Backend workers register themselves using the `register_model` API, after which
| `--router-prefill-load-model <none\|aic>` | `none` | Prompt-side load model. `aic` decays only the oldest active prefill using an AIC-predicted duration |
| `--router-queue-threshold <float>` | `4.0` | Queue threshold fraction; enables priority scheduling via `priority` |
| `--router-queue-policy <str>` | `fcfs` | Scheduling policy for the queue: `fcfs` (tail TTFT), `wspt` (avg TTFT), or `lcfs` (comparison-only reverse ordering) |
| `--serve-indexer` | `false` | Serve the Dynamo-native remote indexer from this frontend/router on the worker component |
| `--use-remote-indexer` | `false` | Query the worker component's served remote indexer instead of maintaining a local overlap indexer |

For all available options: `python -m dynamo.frontend --help`

Expand Down Expand Up @@ -444,6 +446,63 @@ graph TD

For improved fault tolerance, you can launch multiple frontend + router replicas. If multiple `dynamo.frontend` processes share the same host or network namespace, give each instance a different HTTP port. In Kubernetes or on separate hosts, replicas can usually reuse the same container port. Alternatively, you can deploy the router separately as the standalone `python -m dynamo.router` service; see the [Standalone Router README](https://github.com/ai-dynamo/dynamo/blob/main/components/src/dynamo/router/README.md).

### Dynamo-Native Remote Indexer

For Dynamo-native deployments, the remote indexer is served by `dynamo.frontend` or `dynamo.router`, not by `dynamo.indexer`.

- Use `--serve-indexer` on router/frontend replicas that should expose `kv_indexer_query` from the worker component.
- Use `--use-remote-indexer` on consumer routers/frontends that should query that served endpoint instead of maintaining a local overlap indexer.
- `dynamo.indexer` remains the standalone HTTP + ZMQ microservice for non-Dynamo / direct-ZMQ deployments.

Frontend example:

```bash
# Serving anchors
python -m dynamo.frontend --router-mode kv --serve-indexer

# Consumer frontend
python -m dynamo.frontend --router-mode kv --use-remote-indexer
```

The served service is request-plane only. Each serving router/frontend keeps its normal local KV event ingestion, gap detection, and worker-query recovery path; remote consumers only issue hash-based overlap queries.

Approximate mode (`--no-router-kv-events`) is singleton-only for remote serving: only one `--serve-indexer` replica may exist for a given worker component. Event-driven mode allows multiple serving replicas behind the same worker component.
Comment thread
PeaBrane marked this conversation as resolved.

```mermaid
graph TD
subgraph "Workers"
W1["Worker 1"]
W2["Worker 2"]
end

subgraph "Event Plane"
EP["KV Events"]
end

subgraph "Serving Routers / Frontends"
S1["Router / Frontend A<br/>--serve-indexer"]
S2["Router / Frontend B<br/>--serve-indexer"]
I1["Local Indexer"]
I2["Local Indexer"]
end

subgraph "Request Plane"
RP["backend.kv_indexer_query"]
end

C["Consumer Router / Frontend<br/>--use-remote-indexer"]

W1 --> EP
W2 --> EP
EP --> S1
EP --> S2
S1 --> I1
S2 --> I2
C --> RP
RP --> S1
RP --> S2
```

### Router State Management

The KV Router tracks two types of state (see [Router Design](../../design-docs/router-design.md) for details):
Expand Down
Loading
Loading