NVIDIA · rapids-bot · Oct 1, 2025 · Sep 30, 2025 · Sep 30, 2025
@@ -26,6 +26,7 @@ NVIDIA NeMo Agent toolkit supports the following LLM providers:
 | [OpenAI](https://openai.com) | `openai` | OpenAI API |
 | [AWS Bedrock](https://aws.amazon.com/bedrock/) | `aws_bedrock` | AWS Bedrock API |
 | [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/quickstart) | `azure_openai` | Azure OpenAI API |
+| [LiteLLM](https://github.com/BerriAI/litellm) | `litellm` | LiteLLM API |
 
 
 ## LLM Configuration
@@ -47,6 +48,9 @@ llms:
   azure_openai_llm:
     _type: azure_openai
     azure_deployment: gpt-4o-mini
+  litellm_llm:
+    _type: litellm
+    model_name: gpt-4o
 ```
 
 ### NVIDIA NIM
@@ -128,6 +132,22 @@ The Azure OpenAI LLM provider is defined by the {py:class}`~nat.llm.azure_openai
 `temperature` is model-gated and may not be supported by all models. See [Gated Fields](../../extend/gated-fields.md) for details.
 :::
 
+### LiteLLM
+
+LiteLLM is a general purpose LLM provider that can be used with any model provider that is supported by LiteLLM.
+See the [LiteLLM provider documentation](https://docs.litellm.ai/docs/providers) for more information on how to use LiteLLM.
+
+The LiteLLM LLM provider is defined by the {py:class}`~nat.llm.litellm_llm.LiteLlmModelConfig` class.
+
+* `model_name` - The name of the model to use (dependent on the model provider)
+* `api_key` - The API key to use for the model (dependent on the model provider)
+* `base_url` - The base URL to use for the model
+* `seed` - The seed to use for the model
+* `temperature` - The temperature to use for the model
+* `top_p` - The top-p value to use for the model
+* `max_retries` - The maximum number of retries for the request
+
+
 ## Testing Provider
 ### `nat_test_llm`
 `nat_test_llm` is a development and testing provider intended for examples and CI. It is not intended for production use.

@@ -21,6 +21,7 @@
 from nat.data_models.llm import LLMBaseConfig
 from nat.data_models.retry_mixin import RetryMixin
 from nat.data_models.thinking_mixin import ThinkingMixin
+from nat.llm.litellm_llm import LiteLlmModelConfig
 from nat.llm.nim_llm import NIMModelConfig
 from nat.llm.openai_llm import OpenAIModelConfig
 from nat.llm.utils.thinking import BaseThinkingInjector
@@ -99,3 +100,20 @@ async def openai_agno(llm_config: OpenAIModelConfig, _builder: Builder):
     client = OpenAIChat(**config_obj, id=llm_config.model_name)
 
     yield _patch_llm_based_on_config(client, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.AGNO)
+async def litellm_agno(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+    from agno.models.litellm.chat import LiteLLM
+
+    client = LiteLLM(
+        **llm_config.model_dump(
+            exclude={"type", "thinking", "model_name"},
+            by_alias=True,
+            exclude_none=True,
+        ),
+        id=llm_config.model_name,
+    )
+
+    yield _patch_llm_based_on_config(client, llm_config)
@@ -23,6 +23,7 @@
 from nat.data_models.retry_mixin import RetryMixin
 from nat.data_models.thinking_mixin import ThinkingMixin
 from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
+from nat.llm.litellm_llm import LiteLlmModelConfig
 from nat.llm.nim_llm import NIMModelConfig
 from nat.llm.openai_llm import OpenAIModelConfig
 from nat.llm.utils.thinking import BaseThinkingInjector
@@ -126,3 +127,13 @@ async def openai_crewai(llm_config: OpenAIModelConfig, _builder: Builder):
     client = LLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
 
     yield _patch_llm_based_on_config(client, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.CREWAI)
+async def litellm_crewai(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+    from crewai import LLM
+
+    client = LLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
+
+    yield _patch_llm_based_on_config(client, llm_config)
@@ -23,6 +23,7 @@ dependencies = [
   "nvidia-nat~=1.3",
   "langchain-aws~=0.2.31",
   "langchain-core~=0.3.75",
+  "langchain-litellm~=0.2.3",
   "langchain-milvus~=0.2.1",
   "langchain-nvidia-ai-endpoints~=0.3.17",
   "langchain-openai~=0.3.32",

@@ -24,6 +24,7 @@
 from nat.data_models.thinking_mixin import ThinkingMixin
 from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
 from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
+from nat.llm.litellm_llm import LiteLlmModelConfig
 from nat.llm.nim_llm import NIMModelConfig
 from nat.llm.openai_llm import OpenAIModelConfig
 from nat.llm.utils.thinking import BaseThinkingInjector
@@ -154,3 +155,13 @@ async def openai_langchain(llm_config: OpenAIModelConfig, _builder: Builder):
                         ))
 
     yield _patch_llm_based_on_config(client, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+async def litellm_langchain(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+    from langchain_litellm import ChatLiteLLM
+
+    client = ChatLiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
+
+    yield _patch_llm_based_on_config(client, llm_config)
@@ -29,6 +29,7 @@ dependencies = [
   "llama-index-embeddings-openai~=0.3.1",
   "llama-index-llms-azure-openai~=0.3.2",
   "llama-index-llms-bedrock~=0.3.8",
+  "llama-index-llms-litellm~=0.5.1",
   "llama-index-llms-nvidia~=0.3.1",
   "llama-index-llms-openai~=0.3.42",
   "llama-index-readers-file~=0.4.4",

@@ -24,6 +24,7 @@
 from nat.data_models.thinking_mixin import ThinkingMixin
 from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
 from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
+from nat.llm.litellm_llm import LiteLlmModelConfig
 from nat.llm.nim_llm import NIMModelConfig
 from nat.llm.openai_llm import OpenAIModelConfig
 from nat.llm.utils.thinking import BaseThinkingInjector
@@ -107,3 +108,13 @@ async def openai_llama_index(llm_config: OpenAIModelConfig, _builder: Builder):
     llm = OpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
 
     yield _patch_llm_based_on_config(llm, llm_config)
+
+
+@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
+async def litellm_llama_index(llm_config: LiteLlmModelConfig, _builder: Builder):
+
+    from llama_index.llms.litellm import LiteLLM
+
+    llm = LiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
+
+    yield _patch_llm_based_on_config(llm, llm_config)