diff --git a/.coderabbit.yaml b/.coderabbit.yaml
index 92c0f6813..b0cfd8cbd 100644
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -28,6 +28,7 @@ reviews:
issue_assessment:
mode: warning
auto_review:
+ base_branches: ["develop", "release/.*"]
enabled: true
drafts: false
auto_incremental_review: true
diff --git a/README.md b/README.md
index 89d39ee45..4a60d0e24 100644
--- a/README.md
+++ b/README.md
@@ -81,13 +81,13 @@ pip install nvidia-nat
NeMo Agent Toolkit has many optional dependencies which can be installed with the core package. Optional dependencies are grouped by framework and can be installed with the core package. For example, to install the LangChain/LangGraph plugin, run the following:
```bash
-pip install nvidia-nat[langchain]
+pip install "nvidia-nat[langchain]"
```
Or for all optional dependencies:
```bash
-pip install nvidia-nat[all]
+pip install "nvidia-nat[all]"
```
The full list of optional dependencies can be found [here](./docs/source/quick-start/installing.md#framework-integrations).
diff --git a/ci/scripts/gitlab/report_test_results.py b/ci/scripts/gitlab/report_test_results.py
index cdd5a30a4..491b8a701 100755
--- a/ci/scripts/gitlab/report_test_results.py
+++ b/ci/scripts/gitlab/report_test_results.py
@@ -99,6 +99,7 @@ def add_text(text: str, blocks: list[dict], plain_text: list[str]) -> None:
def build_messages(junit_data: dict[str, typing.Any], coverage_data: str) -> ReportMessages:
+ branch_name = os.environ.get("CI_COMMIT_BRANCH", "unknown")
num_errors = junit_data['num_errors']
num_failures = junit_data['num_failures']
@@ -107,7 +108,7 @@ def build_messages(junit_data: dict[str, typing.Any], coverage_data: str) -> Rep
plain_text = []
blocks = []
- summary_line = f"Nightly CI/CD Test Results for {date.today()}"
+ summary_line = f"Nightly CI/CD Test Results for `{branch_name}` - {date.today()}"
plain_text.append(summary_line + "\n")
num_errors_and_failures = num_errors + num_failures
diff --git a/ci/vale/styles/config/vocabularies/nat/accept.txt b/ci/vale/styles/config/vocabularies/nat/accept.txt
index 89819881b..3df7f79e8 100644
--- a/ci/vale/styles/config/vocabularies/nat/accept.txt
+++ b/ci/vale/styles/config/vocabularies/nat/accept.txt
@@ -102,6 +102,7 @@ onboarding
[Oo]verfitting
pandas
[Pp]arallelization
+Pareto
[Pp]arsable
Patronus
PCIe
diff --git a/docs/source/extend/telemetry-exporters.md b/docs/source/extend/telemetry-exporters.md
index b99d2c6d2..f9ba45598 100644
--- a/docs/source/extend/telemetry-exporters.md
+++ b/docs/source/extend/telemetry-exporters.md
@@ -266,14 +266,14 @@ Before creating a custom exporter, check if your observability service is alread
| Service | Type | Installation | Configuration |
|---------|------|-------------|---------------|
| **File** | `file` | `pip install nvidia-nat` | local file or directory |
-| **Langfuse** | `langfuse` | `pip install nvidia-nat[opentelemetry]` | endpoint + API keys |
-| **LangSmith** | `langsmith` | `pip install nvidia-nat[opentelemetry]` | endpoint + API key |
-| **OpenTelemetry Collector** | `otelcollector` | `pip install nvidia-nat[opentelemetry]` | endpoint + headers |
-| **Patronus** | `patronus` | `pip install nvidia-nat[opentelemetry]` | endpoint + API key |
-| **Galileo** | `galileo` | `pip install nvidia-nat[opentelemetry]` | endpoint + API key |
-| **Phoenix** | `phoenix` | `pip install nvidia-nat[phoenix]` | endpoint |
-| **RagaAI/Catalyst** | `catalyst` | `pip install nvidia-nat[ragaai]` | API key + project |
-| **Weave** | `weave` | `pip install nvidia-nat[weave]` | project name |
+| **Langfuse** | `langfuse` | `pip install "nvidia-nat[opentelemetry]"` | endpoint + API keys |
+| **LangSmith** | `langsmith` | `pip install "nvidia-nat[opentelemetry]"` | endpoint + API key |
+| **OpenTelemetry Collector** | `otelcollector` | `pip install "nvidia-nat[opentelemetry]"` | endpoint + headers |
+| **Patronus** | `patronus` | `pip install "nvidia-nat[opentelemetry]"` | endpoint + API key |
+| **Galileo** | `galileo` | `pip install "nvidia-nat[opentelemetry]"` | endpoint + API key |
+| **Phoenix** | `phoenix` | `pip install "nvidia-nat[phoenix]"` | endpoint |
+| **RagaAI/Catalyst** | `catalyst` | `pip install "nvidia-nat[ragaai]"` | API key + project |
+| **Weave** | `weave` | `pip install "nvidia-nat[weave]"` | project name |
### Simple Configuration Example
@@ -412,7 +412,7 @@ class CustomSpanExporter(SpanExporter[Span, dict]):
> **Note**: OpenTelemetry exporters require the `nvidia-nat-opentelemetry` subpackage. Install it with:
> ```bash
-> pip install nvidia-nat[opentelemetry]
+> pip install "nvidia-nat[opentelemetry]"
> ```
For most OTLP-compatible services, use the pre-built `OTLPSpanAdapterExporter`:
diff --git a/docs/source/quick-start/installing.md b/docs/source/quick-start/installing.md
index db7272282..cec51c328 100644
--- a/docs/source/quick-start/installing.md
+++ b/docs/source/quick-start/installing.md
@@ -92,13 +92,13 @@ pip install nvidia-nat
NeMo Agent toolkit has many optional dependencies which can be installed with the core package. Optional dependencies are grouped by framework and can be installed with the core package. For example, to install the LangChain/LangGraph plugin, run the following:
```bash
-pip install nvidia-nat[langchain]
+pip install "nvidia-nat[langchain]"
```
Or for all optional dependencies:
```bash
-pip install nvidia-nat[all]
+pip install "nvidia-nat[all]"
```
The full list of optional dependencies can be found [here](../quick-start/installing.md#framework-integrations).
diff --git a/docs/source/reference/api-server-endpoints.md b/docs/source/reference/api-server-endpoints.md
index 5cb7ea108..5084ecfb0 100644
--- a/docs/source/reference/api-server-endpoints.md
+++ b/docs/source/reference/api-server-endpoints.md
@@ -61,7 +61,7 @@ result back to the client. The transaction schema is defined by the workflow.
## Asynchronous Generate
The asynchronous generate endpoint allows clients to submit a workflow to run in the background and return a response immediately with a unique identifier for the workflow. This can be used to query the status and results of the workflow at a later time. This is useful for long-running workflows, which would otherwise cause the client to time out.
-This endpoint is only available when the `async_endpoints` optional dependency extra is installed. For users installing from source, this can be done by running `uv pip install -e '.[async_endpoints]'` from the root directory of the NeMo Agent toolkit library. Similarly, for users installing from PyPI, this can be done by running `pip install 'nvidia-nat[async_endpoints]'`.
+This endpoint is only available when the `async_endpoints` optional dependency extra is installed. For users installing from source, this can be done by running `uv pip install -e '.[async_endpoints]'` from the root directory of the NeMo Agent toolkit library. Similarly, for users installing from PyPI, this can be done by running `pip install "nvidia-nat[async_endpoints]"`.
Asynchronous jobs are managed using [Dask](https://docs.dask.org/en/stable/). By default, a local Dask cluster is created at start time, however you can also configure the server to connect to an existing Dask scheduler by setting the `scheduler_address` configuration parameter. The Dask scheduler is used to manage the execution of asynchronous jobs, and can be configured to run on a single machine or across a cluster of machines. Job history and metadata is stored in a SQL database using [SQLAlchemy](https://www.sqlalchemy.org/). By default, a temporary SQLite database is created at start time, however you can also configure the server to use a persistent database by setting the `db_url` configuration parameter. Refer to the [SQLAlchemy documentation](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) for the format of the `db_url` parameter. Any database supported by [SQLAlchemy's Asynchronous I/O extension](https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html) can be used. Refer to [SQLAlchemy's Dialects](https://docs.sqlalchemy.org/en/20/dialects/index.html) for a complete list (many but not all of these support Asynchronous I/O).
diff --git a/docs/source/reference/evaluate-api.md b/docs/source/reference/evaluate-api.md
index 22852d298..2b419401a 100644
--- a/docs/source/reference/evaluate-api.md
+++ b/docs/source/reference/evaluate-api.md
@@ -20,7 +20,7 @@ limitations under the License.
It is recommended that the [Evaluating NeMo Agent toolkit Workflows](./evaluate.md) guide be read before proceeding with this detailed documentation.
:::
-The evaluation endpoint can be used to start evaluation jobs on a remote NeMo Agent toolkit server. This endpoint is only available when the `async_endpoints` optional dependency extra is installed. For users installing from source, this can be done by running `uv pip install -e '.[async_endpoints]'` from the root directory of the NeMo Agent toolkit library. Similarly, for users installing from PyPI, this can be done by running `pip install 'nvidia-nat[async_endpoints]'`.
+The evaluation endpoint can be used to start evaluation jobs on a remote NeMo Agent toolkit server. This endpoint is only available when the `async_endpoints` optional dependency extra is installed. For users installing from source, this can be done by running `uv pip install -e '.[async_endpoints]'` from the root directory of the NeMo Agent toolkit library. Similarly, for users installing from PyPI, this can be done by running `pip install "nvidia-nat[async_endpoints]"`.
## Evaluation Endpoint Overview
```{mermaid}
diff --git a/docs/source/workflows/evaluate.md b/docs/source/workflows/evaluate.md
index d5d24ddc2..ca989f9bd 100644
--- a/docs/source/workflows/evaluate.md
+++ b/docs/source/workflows/evaluate.md
@@ -34,7 +34,7 @@ uv pip install -e '.[profiling]'
If you are installing from a package, you can install the sub-package by running the following command:
```bash
-uv pip install nvidia-nat[profiling]
+uv pip install "nvidia-nat[profiling]"
```
## Evaluating a Workflow
diff --git a/docs/source/workflows/llms/index.md b/docs/source/workflows/llms/index.md
index f232867e5..280afb774 100644
--- a/docs/source/workflows/llms/index.md
+++ b/docs/source/workflows/llms/index.md
@@ -26,6 +26,7 @@ NVIDIA NeMo Agent toolkit supports the following LLM providers:
| [OpenAI](https://openai.com) | `openai` | OpenAI API |
| [AWS Bedrock](https://aws.amazon.com/bedrock/) | `aws_bedrock` | AWS Bedrock API |
| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/quickstart) | `azure_openai` | Azure OpenAI API |
+| [LiteLLM](https://github.com/BerriAI/litellm) | `litellm` | LiteLLM API |
## LLM Configuration
@@ -47,6 +48,9 @@ llms:
azure_openai_llm:
_type: azure_openai
azure_deployment: gpt-4o-mini
+ litellm_llm:
+ _type: litellm
+ model_name: gpt-4o
```
### NVIDIA NIM
@@ -128,6 +132,22 @@ The Azure OpenAI LLM provider is defined by the {py:class}`~nat.llm.azure_openai
`temperature` is model-gated and may not be supported by all models. See [Gated Fields](../../extend/gated-fields.md) for details.
:::
+### LiteLLM
+
+LiteLLM is a general purpose LLM provider that can be used with any model provider that is supported by LiteLLM.
+See the [LiteLLM provider documentation](https://docs.litellm.ai/docs/providers) for more information on how to use LiteLLM.
+
+The LiteLLM LLM provider is defined by the {py:class}`~nat.llm.litellm_llm.LiteLlmModelConfig` class.
+
+* `model_name` - The name of the model to use (dependent on the model provider)
+* `api_key` - The API key to use for the model (dependent on the model provider)
+* `base_url` - The base URL to use for the model
+* `seed` - The seed to use for the model
+* `temperature` - The temperature to use for the model
+* `top_p` - The top-p value to use for the model
+* `max_retries` - The maximum number of retries for the request
+
+
## Testing Provider
### `nat_test_llm`
`nat_test_llm` is a development and testing provider intended for examples and CI. It is not intended for production use.
diff --git a/docs/source/workflows/mcp/index.md b/docs/source/workflows/mcp/index.md
index be1cb3afc..dcc759dd8 100644
--- a/docs/source/workflows/mcp/index.md
+++ b/docs/source/workflows/mcp/index.md
@@ -21,7 +21,7 @@ NeMo Agent toolkit [Model Context Protocol (MCP)](https://modelcontextprotocol.i
* An [MCP client](./mcp-client.md) to connect to and use tools served by remote MCP servers.
* An [MCP server](./mcp-server.md) to publish tools using MCP to be used by any MCP client.
-**Note:** MCP client functionality requires the `nvidia-nat-mcp` package. Install it with `uv pip install nvidia-nat[mcp]`.
+**Note:** MCP client functionality requires the `nvidia-nat-mcp` package. Install it with `uv pip install "nvidia-nat[mcp]"`.
```{toctree}
@@ -29,4 +29,5 @@ NeMo Agent toolkit [Model Context Protocol (MCP)](https://modelcontextprotocol.i
Connecting to Remote Tools <./mcp-client.md>
Serving NeMo Agent toolkit Functions <./mcp-server.md>
+MCP Authentication <./mcp-auth.md>
```
diff --git a/docs/source/workflows/mcp/mcp-auth.md b/docs/source/workflows/mcp/mcp-auth.md
new file mode 100644
index 000000000..021d1df3a
--- /dev/null
+++ b/docs/source/workflows/mcp/mcp-auth.md
@@ -0,0 +1,180 @@
+
+
+# MCP Authentication for the NVIDIA NeMo Agent Toolkit
+MCP provides authorization capabilities at the transport level, enabling MCP clients to make requests to restricted MCP servers on behalf of resource owners. The NVIDIA NeMo Agent toolkit provides a set of built-in authentication providers for accessing servers that require authentication. The `mcp_oauth2` provider is the default authentication provider in the NeMo Agent toolkit for MCP servers. It conforms to the [MCP OAuth2](https://modelcontextprotocol.io/specification/draft/basic/authorization) specification.
+
+## Supported Capabilities
+NeMo Agent toolkit MCP authentication provides the capabilities required to access protected MCP servers:
+- Dynamic endpoint discovery using the procedures defined in [RFC 9728](https://www.rfc-editor.org/rfc/rfc9728), [RFC 8414](https://www.rfc-editor.org/rfc/rfc8414), and [OpenID Connect](https://openid.net/specs/openid-connect-core-1_0.html)
+- Client registration using the procedures defined in [RFC 7591](https://www.rfc-editor.org/rfc/rfc7591)
+- Authentication using the procedures defined in the [OAuth2 specification](https://datatracker.ietf.org/doc/html/draft-ietf-oauth-v2-1-13)
+
+## Configuring an Auth Provider
+`mcp_oauth2` is a built-in authentication provider in the NeMo Agent toolkit that implements the MCP OAuth2 specification. It is used to authenticate with MCP servers that require authentication.
+Sample configuration:
+```yaml
+authentication:
+ auth_provider_mcp:
+ _type: mcp_oauth2
+ server_url: "http://localhost:9901/mcp"
+ redirect_uri: http://localhost:8000/auth/redirect
+ default_user_id: ${NAT_USER_ID}
+ allow_default_user_id_for_tool_calls: ${ALLOW_DEFAULT_USER_ID_FOR_TOOL_CALLS:-true}
+```
+Configuration options:
+- `server_url`: The URL of the MCP server that requires authentication.
+- `redirect_uri`: The redirect URI for the OAuth2 flow.
+- `default_user_id`: The user ID for discovering and adding tools to the workflow at startup. The `default_user_id` can be any string and is used as the key to cache the user's information. It defaults to the `server_url` if not provided.
+- `allow_default_user_id_for_tool_calls`: Whether to allow the default user ID for tool calls. This is typically enabled for single-user workflows, for example, a workflow that is launched using the `nat run` CLI command. For multi-user workflows, this should be disabled to avoid accidental tool calls by unauthorized users.
+
+To view all configuration options for the `mcp_oauth2` authentication provider, run the following command:
+```bash
+ nat info components -t auth_provider -q mcp_oauth2
+```
+
+### Environment Variables
+Some configuration values are commonly provided through environment variables:
+- `NAT_USER_ID`: Used as `default_user_id` to cache the authenticating user during setup and optionally for tool calls. Defaults to the `server_url` if not provided.
+- `ALLOW_DEFAULT_USER_ID_FOR_TOOL_CALLS`: Controls whether the default user can invoke tools. Defaults to `true` if not provided.
+
+Set them for your current shell:
+```bash
+export NAT_USER_ID="dev-user"
+export ALLOW_DEFAULT_USER_ID_FOR_TOOL_CALLS=true
+```
+## Referencing Auth Providers in Clients
+The authentication provider is referenced by name via the `auth_provider` parameter in the MCP client configuration.
+```yaml
+function_groups:
+ mcp_tools:
+ _type: mcp_client
+ server:
+ transport: streamable-http
+ url: "http://localhost:9901/mcp"
+ auth_provider: auth_provider_mcp
+```
+
+## Limitations & Supported Transports
+- MCP Authentication is only supported for `streamable-http` transport. It is not supported for local `stdio` transport or for `sse` transport.
+- Authentication configuration is only available with `mcp_client` style configuration, not with `mcp_tool_wrapper` style configuration.
+
+## Example Workflow
+The MCP Authentication Example Workflow, `examples/MCP/simple_auth_mcp/README.md`, provides an example of how to use the `mcp_oauth2` authentication provider to authenticate with a MCP server.
+### Example Configuration
+```yaml
+function_groups:
+ mcp_jira:
+ _type: mcp_client
+ server:
+ transport: streamable-http
+ url: ${CORPORATE_MCP_JIRA_URL}
+ auth_provider: mcp_oauth2_jira
+
+authentication:
+ mcp_oauth2_jira:
+ _type: mcp_oauth2
+ server_url: ${CORPORATE_MCP_JIRA_URL}
+ redirect_uri: http://localhost:8000/auth/redirect
+ default_user_id: ${NAT_USER_ID}
+ allow_default_user_id_for_tool_calls: ${ALLOW_DEFAULT_USER_ID_FOR_TOOL_CALLS:-true}
+```
+### Running the Workflow in Single-User Mode (CLI)
+In this mode, the `default_user_id` is used for authentication during setup and for subsequent tool calls.
+
+```{mermaid}
+flowchart LR
+ U[User
default-user-id] --> H[MCP Host
NAT Workflow]
+ H --> C[MCP Client
default-user-id]
+ C --> S[MCP Server
Protected Jira Service]
+```
+
+Set the environment variables to access the protected MCP server:
+```bash
+export CORPORATE_MCP_JIRA_URL="https://your-jira-server.com/mcp"
+export NAT_USER_ID="dev-user"
+export ALLOW_DEFAULT_USER_ID_FOR_TOOL_CALLS=true
+```
+Then run the workflow:
+```bash
+nat run --config_file examples/MCP/simple_auth_mcp/configs/config-mcp-auth-jira.yml --input "What is Jira ticket AIQ-1935 about"
+```
+
+### Running the Workflow in Multi-User Mode (FastAPI)
+In this mode the workflow is served via a FastAPI frontend. Multiple users can access the workflow concurrently using a UI with `WebSocket` mode enabled.
+
+```{mermaid}
+flowchart LR
+ U0[User
default-user-id] --> H2[MCP Host
NAT Workflow]
+ U1[User
UI-User-1] --> H2
+ U2[User
UI-User-2] --> H2
+
+ H2 --> C0[MCP Client
default-user-id]
+ H2 --> C1[MCP Client
UI-User-1]
+ H2 --> C2[MCP Client
UI-User-2]
+
+ C0 --> S2[MCP Server]
+ C1 --> S2
+ C2 --> S2
+```
+
+1. Set the environment variables to access the protected MCP server:
+```bash
+export CORPORATE_MCP_JIRA_URL="https://your-jira-server.com/mcp"
+export NAT_USER_ID="dev-user"
+export ALLOW_DEFAULT_USER_ID_FOR_TOOL_CALLS=false
+```
+2. **Start the workflow**:
+```bash
+nat serve --config_file examples/MCP/simple_auth_mcp/configs/config-mcp-auth-jira.yml
+```
+At this point, a consent window is displayed to the user. The user must authorize the workflow to access the MCP server. This user's information is cached as the default user ID. The `default_user_id` credentials are only used for the initial setup and for populating the tools in the workflow or agent prompt at startup.
+
+Subsequent tool calls can be disabled for the default user ID by setting `allow_default_user_id_for_tool_calls` to `false` in the authentication configuration. This is recommended for multi-user workflows to avoid accidental tool calls by unauthorized users.
+
+3. **Start the UI**:
+- Start the UI by following the instructions in the [User Interface](../../quick-start/launching-ui.md) documentation.
+- Connect to the UI at `http://localhost:3000`
+- Ensure that `WebSocket` mode is enabled by navigating to the top-right corner and selecting the `WebSocket` option in the arrow pop-out.
+
+4. **Send the input to the workflow via the UI**:
+```text
+What is ticket AIQ-1935 about
+```
+At this point, a consent window is displayed again. The `UI` user must authorize the workflow to access the MCP server and call the tool. This user's information is cached separately using the `WebSocket` session cookie as the user ID.
+
+## Displaying Protected MCP Tools via CLI
+MCP client CLI can be used to display and call MCP tools on a remote MCP server. To use a protected MCP server, you need to provide the `--auth` flag:
+```bash
+nat mcp client tool list --url http://example.com/mcp --auth
+```
+This will use the `mcp_oauth2` authentication provider to authenticate the user. For more information, see the [MCP Client](./mcp-client.md) documentation.
+
+## Security Considerations
+- The `default_user_id` is used to cache the authenticating user during setup and optionally for tool calls. It is recommended to set `allow_default_user_id_for_tool_calls` to `false` in the authentication configuration for multi-user workflows to avoid accidental tool calls by unauthorized users.
+- Use HTTPS redirect URIs in production environments.
+- Scope OAuth2 tokens to the minimum required permissions.
+
+## Troubleshooting
+1. **Setup fails** - This can happen if:
+- The user identified by `default_user_id` did not complete the authentication flow through the pop-up UI, or
+- The user did not authorize the workflow to access the MCP server
+
+2. **Tool calls fail** - This can happen if:
+- The workflow was not accessed in `WebSocket` mode, or
+- The user did not complete the authentication flow through the `WebSocket` UI, or
+- The user is not authorized to call the tool
diff --git a/docs/source/workflows/mcp/mcp-client.md b/docs/source/workflows/mcp/mcp-client.md
index b46ca0d72..ea2d06ce1 100644
--- a/docs/source/workflows/mcp/mcp-client.md
+++ b/docs/source/workflows/mcp/mcp-client.md
@@ -28,8 +28,10 @@ This guide will cover how to use a NeMo Agent toolkit workflow as a MCP host wit
MCP client functionality requires the `nvidia-nat-mcp` package. Install it with:
```bash
-uv pip install nvidia-nat[mcp]
+uv pip install "nvidia-nat[mcp]"
```
+## Accessing Protected MCP Servers
+NeMo Agent toolkit can access protected MCP servers via the MCP client auth provider. For more information, see the [MCP Authentication](./mcp-auth.md) documentation.
## MCP Client Configuration
NeMo Agent toolkit enables workflows to use MCP tools as functions. The library handles the MCP server connection, tool discovery, and function registration. This allows the workflow to use MCP tools as regular functions.
@@ -297,7 +299,7 @@ To use a protected MCP server, you need to provide the `--auth` flag:
```bash
nat mcp client tool list --url http://example.com/mcp --auth
```
-This will use the `mcp_oauth2` authentication provider to authenticate the user.
+This will use the `mcp_oauth2` authentication provider to authenticate the user. For more information, see the [MCP Authentication](./mcp-auth.md) documentation.
Sample output:
```text
diff --git a/docs/source/workflows/mcp/mcp-server.md b/docs/source/workflows/mcp/mcp-server.md
index 6514b06da..6f2c6a63b 100644
--- a/docs/source/workflows/mcp/mcp-server.md
+++ b/docs/source/workflows/mcp/mcp-server.md
@@ -62,7 +62,7 @@ nat mcp serve --config_file examples/getting_started/simple_calculator/configs/c
To list the tools published by the MCP server you can use the `nat mcp client tool list` command. This command acts as an MCP client and connects to the MCP server running on the specified URL (defaults to `http://localhost:9901/mcp` for streamable-http, with backwards compatibility for `http://localhost:9901/sse`).
-**Note:** The `nat mcp client` commands require the `nvidia-nat-mcp` package. If you encounter an error about missing MCP client functionality, install it with `uv pip install nvidia-nat[mcp]`.
+**Note:** The `nat mcp client` commands require the `nvidia-nat-mcp` package. If you encounter an error about missing MCP client functionality, install it with `uv pip install "nvidia-nat[mcp]"`.
```bash
nat mcp client tool list
@@ -188,3 +188,8 @@ Sample output:
Server at http://localhost:9901/mcp is healthy (response time: 4.35ms)
```
This is useful for health checks and monitoring.
+
+## Limitations
+- The `nat mcp serve` command currently starts an MCP server without built-in authentication. This is a temporary limitation; server-side authentication is planned for a future release.
+- NAT workflows can still connect to protected third-party MCP servers via the MCP client auth provider.
+- Recommendation: run `nat mcp serve` behind a trusted network or an authenticating reverse proxy (HTTPS with OAuth2, JWT or mTLS), and avoid exposing it directly to the public Internet.
diff --git a/docs/source/workflows/profiler.md b/docs/source/workflows/profiler.md
index 810fddd39..c03344df3 100644
--- a/docs/source/workflows/profiler.md
+++ b/docs/source/workflows/profiler.md
@@ -41,7 +41,7 @@ uv pip install -e ".[profiling]"
If you are installing from a package, you need to install the `nvidia-nat[profiling]` package by running the following command:
```bash
-uv pip install nvidia-nat[profiling]
+uv pip install "nvidia-nat[profiling]"
```
## Current Profiler Architecture
diff --git a/examples/MCP/simple_auth_mcp/README.md b/examples/MCP/simple_auth_mcp/README.md
index 2ef8dfb69..1baf223d6 100644
--- a/examples/MCP/simple_auth_mcp/README.md
+++ b/examples/MCP/simple_auth_mcp/README.md
@@ -19,12 +19,14 @@ limitations under the License.
This example demonstrates how to use the NVIDIA NeMo Agent toolkit with MCP servers that require authentication. You'll authenticate with protected MCP services and access secured tools through OAuth2 flows.
+It is recommended to read the [MCP Authentication](../../../docs/source/workflows/mcp/mcp-auth.md) documentation first.
+
## Prerequisites
1. **Agent toolkit**: Ensure you have the Agent toolkit installed. If you have not already done so, follow the instructions in the [Install Guide](../../../docs/source/quick-start/installing.md#install-from-source) to create the development environment and install NeMo Agent Toolkit.
2. **MCP Server**: Access to an MCP server that requires authentication (e.g., corporate Jira system)
-**Note**: If you installed NeMo Agent toolkit from source, MCP client functionality is already included. If you installed from PyPI, you may need to install the MCP client package separately with `uv pip install nvidia-nat[mcp]`.
+**Note**: If you installed NeMo Agent toolkit from source, MCP client functionality is already included. If you installed from PyPI, you may need to install the MCP client package separately with `uv pip install "nvidia-nat[mcp]"`.
## Install this Workflow
diff --git a/examples/RAG/simple_rag/configs/milvus_memory_rag_tools_config.yml b/examples/RAG/simple_rag/configs/milvus_memory_rag_tools_config.yml
index c4d102b8d..d61c09dc1 100644
--- a/examples/RAG/simple_rag/configs/milvus_memory_rag_tools_config.yml
+++ b/examples/RAG/simple_rag/configs/milvus_memory_rag_tools_config.yml
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
memory:
saas_memory:
_type: mem0_memory
@@ -56,7 +55,7 @@ functions:
The question should be about user preferences which will help you format your response.
For example: "How does the user like responses formatted?"
- # To use these tools you will need to install the nvidia-nat[langchain] package
+ # To use these tools you will need to install the "nvidia-nat[langchain]" package
web_search_tool:
_type: tavily_internet_search
max_results: 5
@@ -85,11 +84,11 @@ embedders:
workflow:
_type: react_agent
tool_names:
- - cuda_retriever_tool
- - mcp_retriever_tool
- - add_memory
- - get_memory
- - web_search_tool
- - code_generation_tool
+ - cuda_retriever_tool
+ - mcp_retriever_tool
+ - add_memory
+ - get_memory
+ - web_search_tool
+ - code_generation_tool
verbose: true
llm_name: nim_llm
diff --git a/examples/RAG/simple_rag/configs/milvus_rag_tools_config.yml b/examples/RAG/simple_rag/configs/milvus_rag_tools_config.yml
index b823d9cf5..dbb446797 100644
--- a/examples/RAG/simple_rag/configs/milvus_rag_tools_config.yml
+++ b/examples/RAG/simple_rag/configs/milvus_rag_tools_config.yml
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
retrievers:
cuda_retriever:
_type: milvus_retriever
@@ -38,7 +37,7 @@ functions:
retriever: mcp_retriever
topic: Retrieve information about Model Context Protocol (MCP)
- # To use these tools you will need to install the nvidia-nat[langchain] package
+ # To use these tools you will need to install the "nvidia-nat[langchain]" package
web_search_tool:
_type: tavily_internet_search
max_results: 5
@@ -67,10 +66,10 @@ embedders:
workflow:
_type: react_agent
tool_names:
- - cuda_retriever_tool
- - mcp_retriever_tool
- - web_search_tool
- - code_generation_tool
+ - cuda_retriever_tool
+ - mcp_retriever_tool
+ - web_search_tool
+ - code_generation_tool
verbose: true
llm_name: nim_llm
additional_instructions: "If a tool call results in code or other artifacts being returned, you MUST include that in your thoughts and response."
diff --git a/examples/evaluation_and_profiling/email_phishing_analyzer/README.md b/examples/evaluation_and_profiling/email_phishing_analyzer/README.md
index 65f3e48aa..02620d10c 100644
--- a/examples/evaluation_and_profiling/email_phishing_analyzer/README.md
+++ b/examples/evaluation_and_profiling/email_phishing_analyzer/README.md
@@ -206,10 +206,14 @@ Ensure `NVIDIA_API_KEY` is set in your environment.
### Outputs
Results are written to the path specified by `optimizer.output_path`. Expect artifacts such as:
-- `best_params.json`: Highest-scoring parameter set.
-- `optimization_results.json`: Per-trial metrics and parameters.
-- `pareto_front.png`, `optimization_history.png`, `param_importances.png`: Visual summaries.
-- For prompt optimization (when enabled): `optimized_prompts.json` and per-generation prompt history.
+- `optimized_config.yml`: Tuned configuration derived from the selected trial.
+- You will also see a configuration file for each iteration of numeric trials. For example, `config_numeric_trial_0.yml`
+ will contain the configuration for the first numeric trial. This is helpful for selecting specific trials whose metrics
+ you may prefer to the optimizer selected trial.
+- `trials_dataframe_params.csv`: Full Optuna trials `dataframe` (`values`, `params`, `timings`, `rep_scores`).
+- `plots`: This directory will contain Pareto visualizations of the optimization results.
+- For prompt optimization (when enabled): `optimized_prompts.json` and per-generation prompt history. Per generation prompt
+ history files are named `optimized_prompts_gen{N}.json` where `{N}` is the generation number starting from 1.
---
diff --git a/examples/frameworks/adk_demo/src/nat_adk_demo/nat_time_mcp_tool.py b/examples/frameworks/adk_demo/src/nat_adk_demo/nat_time_mcp_tool.py
index 5090d02bd..0fe0bbd59 100644
--- a/examples/frameworks/adk_demo/src/nat_adk_demo/nat_time_mcp_tool.py
+++ b/examples/frameworks/adk_demo/src/nat_adk_demo/nat_time_mcp_tool.py
@@ -19,6 +19,7 @@
from zoneinfo import ZoneInfo
from nat.builder.builder import Builder
+from nat.builder.framework_enum import LLMFrameworkEnum
from nat.builder.function_info import FunctionInfo
from nat.cli.register_workflow import register_function
from nat.data_models.function import FunctionBaseConfig
@@ -30,7 +31,7 @@ class TimeMCPToolConfig(FunctionBaseConfig, name="get_city_time_tool"):
"""Configuration for the get_city_time tool."""
-@register_function(config_type=TimeMCPToolConfig)
+@register_function(config_type=TimeMCPToolConfig, framework_wrappers=[LLMFrameworkEnum.ADK])
async def get_city_time(_config: TimeMCPToolConfig, _builder: Builder) -> AsyncIterator[FunctionInfo]:
"""
Register a get_city_time(city: str) -> str tool for ADK.
diff --git a/examples/frameworks/adk_demo/src/nat_adk_demo/weather_update_tool.py b/examples/frameworks/adk_demo/src/nat_adk_demo/weather_update_tool.py
index 9686d412d..48d26eb76 100644
--- a/examples/frameworks/adk_demo/src/nat_adk_demo/weather_update_tool.py
+++ b/examples/frameworks/adk_demo/src/nat_adk_demo/weather_update_tool.py
@@ -17,6 +17,7 @@
from collections.abc import AsyncIterator
from nat.builder.builder import Builder
+from nat.builder.framework_enum import LLMFrameworkEnum
from nat.builder.function_info import FunctionInfo
from nat.cli.register_workflow import register_function
from nat.data_models.function import FunctionBaseConfig
@@ -26,7 +27,7 @@ class WeatherToolConfig(FunctionBaseConfig, name="weather_update"):
pass
-@register_function(config_type=WeatherToolConfig)
+@register_function(config_type=WeatherToolConfig, framework_wrappers=[LLMFrameworkEnum.ADK])
async def weather_update(_config: WeatherToolConfig, _builder: Builder) -> AsyncIterator[FunctionInfo]:
async def _weather_update(city: str) -> str:
diff --git a/examples/notebooks/1_getting_started.ipynb b/examples/notebooks/1_getting_started.ipynb
index 207040c66..65351237b 100644
--- a/examples/notebooks/1_getting_started.ipynb
+++ b/examples/notebooks/1_getting_started.ipynb
@@ -18,7 +18,7 @@
"Ensure you meet the following prerequisites:\n",
"1. Git\n",
"2. [uv](https://docs.astral.sh/uv/getting-started/installation/)\n",
- "3. NeMo-Agent-Toolkit installed from source following [these instructions](https://github.com/cdgamarose-nv/NeMo-Agent-Toolkit/tree/develop?tab=readme-ov-file#install-from-source)\n"
+ "3. NeMo-Agent-Toolkit installed from source following [these instructions](https://github.com/NVIDIA/NeMo-Agent-Toolkit/blob/main/docs/source/quick-start/installing.md#installation-from-source)\n"
]
},
{
diff --git a/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py b/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py
index 2b1e8b49b..18a6d3726 100644
--- a/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py
+++ b/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py
@@ -21,6 +21,7 @@
from nat.data_models.llm import LLMBaseConfig
from nat.data_models.retry_mixin import RetryMixin
from nat.data_models.thinking_mixin import ThinkingMixin
+from nat.llm.litellm_llm import LiteLlmModelConfig
from nat.llm.nim_llm import NIMModelConfig
from nat.llm.openai_llm import OpenAIModelConfig
from nat.llm.utils.thinking import BaseThinkingInjector
@@ -99,3 +100,20 @@ async def openai_agno(llm_config: OpenAIModelConfig, _builder: Builder):
client = OpenAIChat(**config_obj, id=llm_config.model_name)
yield _patch_llm_based_on_config(client, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.AGNO)
+async def litellm_agno(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+ from agno.models.litellm.chat import LiteLLM
+
+ client = LiteLLM(
+ **llm_config.model_dump(
+ exclude={"type", "thinking", "model_name"},
+ by_alias=True,
+ exclude_none=True,
+ ),
+ id=llm_config.model_name,
+ )
+
+ yield _patch_llm_based_on_config(client, llm_config)
diff --git a/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py b/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py
index 078107acc..e56b80329 100644
--- a/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py
+++ b/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py
@@ -23,6 +23,7 @@
from nat.data_models.retry_mixin import RetryMixin
from nat.data_models.thinking_mixin import ThinkingMixin
from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
+from nat.llm.litellm_llm import LiteLlmModelConfig
from nat.llm.nim_llm import NIMModelConfig
from nat.llm.openai_llm import OpenAIModelConfig
from nat.llm.utils.thinking import BaseThinkingInjector
@@ -126,3 +127,13 @@ async def openai_crewai(llm_config: OpenAIModelConfig, _builder: Builder):
client = LLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
yield _patch_llm_based_on_config(client, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.CREWAI)
+async def litellm_crewai(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+ from crewai import LLM
+
+ client = LLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
+
+ yield _patch_llm_based_on_config(client, llm_config)
diff --git a/packages/nvidia_nat_langchain/pyproject.toml b/packages/nvidia_nat_langchain/pyproject.toml
index 54f597345..47c3d29d7 100644
--- a/packages/nvidia_nat_langchain/pyproject.toml
+++ b/packages/nvidia_nat_langchain/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
"nvidia-nat~=1.3",
"langchain-aws~=0.2.31",
"langchain-core~=0.3.75",
+ "langchain-litellm~=0.2.3",
"langchain-milvus~=0.2.1",
"langchain-nvidia-ai-endpoints~=0.3.17",
"langchain-openai~=0.3.32",
diff --git a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py
index ea7a12b15..527f28beb 100644
--- a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py
+++ b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py
@@ -24,6 +24,7 @@
from nat.data_models.thinking_mixin import ThinkingMixin
from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
+from nat.llm.litellm_llm import LiteLlmModelConfig
from nat.llm.nim_llm import NIMModelConfig
from nat.llm.openai_llm import OpenAIModelConfig
from nat.llm.utils.thinking import BaseThinkingInjector
@@ -154,3 +155,13 @@ async def openai_langchain(llm_config: OpenAIModelConfig, _builder: Builder):
))
yield _patch_llm_based_on_config(client, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+async def litellm_langchain(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+ from langchain_litellm import ChatLiteLLM
+
+ client = ChatLiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
+
+ yield _patch_llm_based_on_config(client, llm_config)
diff --git a/packages/nvidia_nat_llama_index/pyproject.toml b/packages/nvidia_nat_llama_index/pyproject.toml
index 3e8e66eb5..5f51b4025 100644
--- a/packages/nvidia_nat_llama_index/pyproject.toml
+++ b/packages/nvidia_nat_llama_index/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
"llama-index-embeddings-openai~=0.3.1",
"llama-index-llms-azure-openai~=0.3.2",
"llama-index-llms-bedrock~=0.3.8",
+ "llama-index-llms-litellm~=0.5.1",
"llama-index-llms-nvidia~=0.3.1",
"llama-index-llms-openai~=0.3.42",
"llama-index-readers-file~=0.4.4",
diff --git a/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py b/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py
index 42d788e55..c1e9b4638 100644
--- a/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py
+++ b/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py
@@ -24,6 +24,7 @@
from nat.data_models.thinking_mixin import ThinkingMixin
from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
+from nat.llm.litellm_llm import LiteLlmModelConfig
from nat.llm.nim_llm import NIMModelConfig
from nat.llm.openai_llm import OpenAIModelConfig
from nat.llm.utils.thinking import BaseThinkingInjector
@@ -107,3 +108,13 @@ async def openai_llama_index(llm_config: OpenAIModelConfig, _builder: Builder):
llm = OpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
yield _patch_llm_based_on_config(llm, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
+async def litellm_llama_index(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+ from llama_index.llms.litellm import LiteLLM
+
+ llm = LiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
+
+ yield _patch_llm_based_on_config(llm, llm_config)
diff --git a/pyproject.toml b/pyproject.toml
index 88c363db1..d1e025b14 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,7 +66,7 @@ maintainers = [{ name = "NVIDIA Corporation" }]
[project.optional-dependencies]
-# Optional dependencies are things that users would want to install with NAT. i.e. `uv pip install nvidia-nat[langchain]`
+# Optional dependencies are things that users would want to install with NAT. i.e. `uv pip install "nvidia-nat[langchain]"`
# Keep sorted!!!
all = ["nvidia-nat-all"] # meta-package
adk = ["nvidia-nat-adk"]
diff --git a/src/nat/agent/prompt_optimizer/register.py b/src/nat/agent/prompt_optimizer/register.py
index 83a7e2458..ed3d1533e 100644
--- a/src/nat/agent/prompt_optimizer/register.py
+++ b/src/nat/agent/prompt_optimizer/register.py
@@ -51,7 +51,7 @@ async def prompt_optimizer_function(config: PromptOptimizerConfig, builder: Buil
from .prompt import mutator_prompt
except ImportError as exc:
raise ImportError("langchain-core is not installed. Please install it to use MultiLLMPlanner.\n"
- "This error can be resolve by installing nvidia-nat[langchain]") from exc
+ "This error can be resolve by installing \"nvidia-nat[langchain]\".") from exc
llm = await builder.get_llm(config.optimizer_llm, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
@@ -111,7 +111,7 @@ async def prompt_recombiner_function(config: PromptRecombinerConfig, builder: Bu
from langchain_core.prompts import PromptTemplate
except ImportError as exc:
raise ImportError("langchain-core is not installed. Please install it to use MultiLLMPlanner.\n"
- "This error can be resolve by installing nvidia-nat[langchain].") from exc
+ "This error can be resolve by installing \"nvidia-nat[langchain]\".") from exc
llm = await builder.get_llm(config.optimizer_llm, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
diff --git a/src/nat/cli/commands/mcp/mcp.py b/src/nat/cli/commands/mcp/mcp.py
index ca992e87f..50512286f 100644
--- a/src/nat/cli/commands/mcp/mcp.py
+++ b/src/nat/cli/commands/mcp/mcp.py
@@ -297,7 +297,7 @@ def to_tool_entry(full_name: str, fn_obj) -> dict[str, str | None]:
if fn is not None:
tools.append(to_tool_entry(full, fn))
else:
- for full, fn in (await fns).items():
+ for full, fn in fns.items():
tools.append(to_tool_entry(full, fn))
return tools
diff --git a/src/nat/cli/main.py b/src/nat/cli/main.py
index 009840a39..8a3b08015 100644
--- a/src/nat/cli/main.py
+++ b/src/nat/cli/main.py
@@ -30,6 +30,9 @@ def run_cli():
import os
import sys
+ # Suppress warnings from transformers
+ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
+
parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
if (parent_dir not in sys.path):
diff --git a/src/nat/profiler/decorators/framework_wrapper.py b/src/nat/profiler/decorators/framework_wrapper.py
index 59f62febc..7a89ed0a4 100644
--- a/src/nat/profiler/decorators/framework_wrapper.py
+++ b/src/nat/profiler/decorators/framework_wrapper.py
@@ -123,7 +123,7 @@ async def wrapper(workflow_config, builder):
except ImportError as e:
logger.warning(
"ADK profiler not available. " +
- "Install NAT with ADK extras: pip install 'nvidia-nat[adk]'. Error: %s",
+ "Install NAT with ADK extras: pip install \"nvidia-nat[adk]\". Error: %s",
e)
else:
handler = ADKProfilerHandler()
diff --git a/src/nat/profiler/forecasting/models/linear_model.py b/src/nat/profiler/forecasting/models/linear_model.py
index be6c9d19b..6c3589bd1 100644
--- a/src/nat/profiler/forecasting/models/linear_model.py
+++ b/src/nat/profiler/forecasting/models/linear_model.py
@@ -36,7 +36,7 @@ def __init__(self):
except ImportError:
logger.error(
"scikit-learn is not installed. Please install scikit-learn to use the LinearModel "
- "profiling model or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+ "profiling model or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
raise
diff --git a/src/nat/profiler/forecasting/models/random_forest_regressor.py b/src/nat/profiler/forecasting/models/random_forest_regressor.py
index 51a3c40d1..3fa36310a 100644
--- a/src/nat/profiler/forecasting/models/random_forest_regressor.py
+++ b/src/nat/profiler/forecasting/models/random_forest_regressor.py
@@ -36,7 +36,7 @@ def __init__(self):
except ImportError:
logger.error(
"scikit-learn is not installed. Please install scikit-learn to use the RandomForest "
- "profiling model or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+ "profiling model or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
raise
diff --git a/src/nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py b/src/nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py
index 3ca6b0347..bd02b9872 100644
--- a/src/nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py
+++ b/src/nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py
@@ -304,7 +304,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
import matplotlib.pyplot as plt
except ImportError:
logger.error("matplotlib is not installed. Please install matplotlib to use generate plots for the profiler "
- "or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+ "or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
raise
diff --git a/src/nat/profiler/inference_optimization/experimental/prefix_span_analysis.py b/src/nat/profiler/inference_optimization/experimental/prefix_span_analysis.py
index 475740500..8da76bd1e 100644
--- a/src/nat/profiler/inference_optimization/experimental/prefix_span_analysis.py
+++ b/src/nat/profiler/inference_optimization/experimental/prefix_span_analysis.py
@@ -212,7 +212,7 @@ def run_prefixspan(sequences_map: dict[int, list[PrefixCallNode]],
from prefixspan import PrefixSpan
except ImportError:
logger.error("prefixspan is not installed. Please install prefixspan to run the prefix analysis in the "
- "profiler or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+ "profiler or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
raise
diff --git a/uv.lock b/uv.lock
index 995d06e1b..71be25701 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3909,6 +3909,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/77/b5/501c0ffcb09c734457ceaa86bc7b1dd37b6a261147bd653add03b838aacb/langchain_core-0.3.76-py3-none-any.whl", hash = "sha256:46e0eb48c7ac532432d51f8ca1ece1804c82afe9ae3dcf027b867edadf82b3ec", size = 447508, upload-time = "2025-09-10T14:49:38.179Z" },
]
+[[package]]
+name = "langchain-litellm"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "langchain-core" },
+ { name = "litellm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/8f/08032033cd4bdff1d177d6a9e3a1021e47c4c63fd1d8c564af6f3c7e9f8d/langchain_litellm-0.2.3.tar.gz", hash = "sha256:0e11687373ae6a99efee5a04d3a76de4fab0e1459edc0e84adb6f60ca76ebf79", size = 10829, upload-time = "2025-09-25T11:01:41.295Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/76/26/271b1dad80b39a0e9df7ab13f63fa3fad52ce8288ddf73dec32a2212219f/langchain_litellm-0.2.3-py3-none-any.whl", hash = "sha256:422254b8742893aed6380f5ee73e6ae77869b218758edd0888d14ebd2c439352", size = 11571, upload-time = "2025-09-25T11:01:40.183Z" },
+]
+
[[package]]
name = "langchain-milvus"
version = "0.2.1"
@@ -4376,6 +4389,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ff/2f/7fc5206467151f64764bae61abd0fbbb8392fe84def15b1467f7fb174d7b/llama_index_llms_bedrock-0.3.8-py3-none-any.whl", hash = "sha256:58b804a206146bd7228590a4ee92ce13806a21040d92cb61e3046f2ee64f66cd", size = 11516, upload-time = "2025-03-26T16:15:07.722Z" },
]
+[[package]]
+name = "llama-index-llms-litellm"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "litellm" },
+ { name = "llama-index-core" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4d/13/64d3d2db8924eec085c05addaf8279f0ba95b28820ea4f035d0ba77cf711/llama_index_llms_litellm-0.5.1.tar.gz", hash = "sha256:b99460b0d1ef7cf48e02d139dc5358e6818a80b95633ba67d8438c73e180f5ef", size = 10611, upload-time = "2025-06-04T11:53:39.934Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/7c/df/87bb97f75390aff4a4a479c329d27dd27d6069451574176eaaf837f67e07/llama_index_llms_litellm-0.5.1-py3-none-any.whl", hash = "sha256:f63c8384d051a9983a86e6f00f79cd9d6a5b81a035a4cff7c1548c3ff4b4d7f0", size = 10881, upload-time = "2025-06-04T11:53:38.619Z" },
+]
+
[[package]]
name = "llama-index-llms-nvidia"
version = "0.3.3"
@@ -6362,6 +6388,7 @@ source = { editable = "packages/nvidia_nat_langchain" }
dependencies = [
{ name = "langchain-aws" },
{ name = "langchain-core" },
+ { name = "langchain-litellm" },
{ name = "langchain-milvus" },
{ name = "langchain-nvidia-ai-endpoints" },
{ name = "langchain-openai" },
@@ -6374,6 +6401,7 @@ dependencies = [
requires-dist = [
{ name = "langchain-aws", specifier = "~=0.2.31" },
{ name = "langchain-core", specifier = "~=0.3.75" },
+ { name = "langchain-litellm", specifier = "~=0.2.3" },
{ name = "langchain-milvus", specifier = "~=0.2.1" },
{ name = "langchain-nvidia-ai-endpoints", specifier = "~=0.3.17" },
{ name = "langchain-openai", specifier = "~=0.3.32" },
@@ -6393,6 +6421,7 @@ dependencies = [
{ name = "llama-index-embeddings-openai" },
{ name = "llama-index-llms-azure-openai" },
{ name = "llama-index-llms-bedrock" },
+ { name = "llama-index-llms-litellm" },
{ name = "llama-index-llms-nvidia" },
{ name = "llama-index-llms-openai" },
{ name = "llama-index-readers-file" },
@@ -6408,6 +6437,7 @@ requires-dist = [
{ name = "llama-index-embeddings-openai", specifier = "~=0.3.1" },
{ name = "llama-index-llms-azure-openai", specifier = "~=0.3.2" },
{ name = "llama-index-llms-bedrock", specifier = "~=0.3.8" },
+ { name = "llama-index-llms-litellm", specifier = "~=0.5.1" },
{ name = "llama-index-llms-nvidia", specifier = "~=0.3.1" },
{ name = "llama-index-llms-openai", specifier = "~=0.3.42" },
{ name = "llama-index-readers-file", specifier = "~=0.4.4" },