diff --git a/docs/source/workflows/llms/index.md b/docs/source/workflows/llms/index.md index f232867e5..280afb774 100644 --- a/docs/source/workflows/llms/index.md +++ b/docs/source/workflows/llms/index.md @@ -26,6 +26,7 @@ NVIDIA NeMo Agent toolkit supports the following LLM providers: | [OpenAI](https://openai.com) | `openai` | OpenAI API | | [AWS Bedrock](https://aws.amazon.com/bedrock/) | `aws_bedrock` | AWS Bedrock API | | [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/quickstart) | `azure_openai` | Azure OpenAI API | +| [LiteLLM](https://github.com/BerriAI/litellm) | `litellm` | LiteLLM API | ## LLM Configuration @@ -47,6 +48,9 @@ llms: azure_openai_llm: _type: azure_openai azure_deployment: gpt-4o-mini + litellm_llm: + _type: litellm + model_name: gpt-4o ``` ### NVIDIA NIM @@ -128,6 +132,22 @@ The Azure OpenAI LLM provider is defined by the {py:class}`~nat.llm.azure_openai `temperature` is model-gated and may not be supported by all models. See [Gated Fields](../../extend/gated-fields.md) for details. ::: +### LiteLLM + +LiteLLM is a general purpose LLM provider that can be used with any model provider that is supported by LiteLLM. +See the [LiteLLM provider documentation](https://docs.litellm.ai/docs/providers) for more information on how to use LiteLLM. + +The LiteLLM LLM provider is defined by the {py:class}`~nat.llm.litellm_llm.LiteLlmModelConfig` class. + +* `model_name` - The name of the model to use (dependent on the model provider) +* `api_key` - The API key to use for the model (dependent on the model provider) +* `base_url` - The base URL to use for the model +* `seed` - The seed to use for the model +* `temperature` - The temperature to use for the model +* `top_p` - The top-p value to use for the model +* `max_retries` - The maximum number of retries for the request + + ## Testing Provider ### `nat_test_llm` `nat_test_llm` is a development and testing provider intended for examples and CI. It is not intended for production use. diff --git a/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py b/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py index 2b1e8b49b..18a6d3726 100644 --- a/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py +++ b/packages/nvidia_nat_agno/src/nat/plugins/agno/llm.py @@ -21,6 +21,7 @@ from nat.data_models.llm import LLMBaseConfig from nat.data_models.retry_mixin import RetryMixin from nat.data_models.thinking_mixin import ThinkingMixin +from nat.llm.litellm_llm import LiteLlmModelConfig from nat.llm.nim_llm import NIMModelConfig from nat.llm.openai_llm import OpenAIModelConfig from nat.llm.utils.thinking import BaseThinkingInjector @@ -99,3 +100,20 @@ async def openai_agno(llm_config: OpenAIModelConfig, _builder: Builder): client = OpenAIChat(**config_obj, id=llm_config.model_name) yield _patch_llm_based_on_config(client, llm_config) + + +@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.AGNO) +async def litellm_agno(llm_config: LiteLlmModelConfig, _builder: Builder): + + from agno.models.litellm.chat import LiteLLM + + client = LiteLLM( + **llm_config.model_dump( + exclude={"type", "thinking", "model_name"}, + by_alias=True, + exclude_none=True, + ), + id=llm_config.model_name, + ) + + yield _patch_llm_based_on_config(client, llm_config) diff --git a/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py b/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py index 078107acc..e56b80329 100644 --- a/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py +++ b/packages/nvidia_nat_crewai/src/nat/plugins/crewai/llm.py @@ -23,6 +23,7 @@ from nat.data_models.retry_mixin import RetryMixin from nat.data_models.thinking_mixin import ThinkingMixin from nat.llm.azure_openai_llm import AzureOpenAIModelConfig +from nat.llm.litellm_llm import LiteLlmModelConfig from nat.llm.nim_llm import NIMModelConfig from nat.llm.openai_llm import OpenAIModelConfig from nat.llm.utils.thinking import BaseThinkingInjector @@ -126,3 +127,13 @@ async def openai_crewai(llm_config: OpenAIModelConfig, _builder: Builder): client = LLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True)) yield _patch_llm_based_on_config(client, llm_config) + + +@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.CREWAI) +async def litellm_crewai(llm_config: LiteLlmModelConfig, _builder: Builder): + + from crewai import LLM + + client = LLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True)) + + yield _patch_llm_based_on_config(client, llm_config) diff --git a/packages/nvidia_nat_langchain/pyproject.toml b/packages/nvidia_nat_langchain/pyproject.toml index 54f597345..47c3d29d7 100644 --- a/packages/nvidia_nat_langchain/pyproject.toml +++ b/packages/nvidia_nat_langchain/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "nvidia-nat~=1.3", "langchain-aws~=0.2.31", "langchain-core~=0.3.75", + "langchain-litellm~=0.2.3", "langchain-milvus~=0.2.1", "langchain-nvidia-ai-endpoints~=0.3.17", "langchain-openai~=0.3.32", diff --git a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py index ea7a12b15..527f28beb 100644 --- a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py +++ b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/llm.py @@ -24,6 +24,7 @@ from nat.data_models.thinking_mixin import ThinkingMixin from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig from nat.llm.azure_openai_llm import AzureOpenAIModelConfig +from nat.llm.litellm_llm import LiteLlmModelConfig from nat.llm.nim_llm import NIMModelConfig from nat.llm.openai_llm import OpenAIModelConfig from nat.llm.utils.thinking import BaseThinkingInjector @@ -154,3 +155,13 @@ async def openai_langchain(llm_config: OpenAIModelConfig, _builder: Builder): )) yield _patch_llm_based_on_config(client, llm_config) + + +@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LANGCHAIN) +async def litellm_langchain(llm_config: LiteLlmModelConfig, _builder: Builder): + + from langchain_litellm import ChatLiteLLM + + client = ChatLiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True)) + + yield _patch_llm_based_on_config(client, llm_config) diff --git a/packages/nvidia_nat_llama_index/pyproject.toml b/packages/nvidia_nat_llama_index/pyproject.toml index 3e8e66eb5..5f51b4025 100644 --- a/packages/nvidia_nat_llama_index/pyproject.toml +++ b/packages/nvidia_nat_llama_index/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "llama-index-embeddings-openai~=0.3.1", "llama-index-llms-azure-openai~=0.3.2", "llama-index-llms-bedrock~=0.3.8", + "llama-index-llms-litellm~=0.5.1", "llama-index-llms-nvidia~=0.3.1", "llama-index-llms-openai~=0.3.42", "llama-index-readers-file~=0.4.4", diff --git a/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py b/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py index 42d788e55..c1e9b4638 100644 --- a/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py +++ b/packages/nvidia_nat_llama_index/src/nat/plugins/llama_index/llm.py @@ -24,6 +24,7 @@ from nat.data_models.thinking_mixin import ThinkingMixin from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig from nat.llm.azure_openai_llm import AzureOpenAIModelConfig +from nat.llm.litellm_llm import LiteLlmModelConfig from nat.llm.nim_llm import NIMModelConfig from nat.llm.openai_llm import OpenAIModelConfig from nat.llm.utils.thinking import BaseThinkingInjector @@ -107,3 +108,13 @@ async def openai_llama_index(llm_config: OpenAIModelConfig, _builder: Builder): llm = OpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True)) yield _patch_llm_based_on_config(llm, llm_config) + + +@register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX) +async def litellm_llama_index(llm_config: LiteLlmModelConfig, _builder: Builder): + + from llama_index.llms.litellm import LiteLLM + + llm = LiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True)) + + yield _patch_llm_based_on_config(llm, llm_config) diff --git a/src/nat/cli/main.py b/src/nat/cli/main.py index 009840a39..8a3b08015 100644 --- a/src/nat/cli/main.py +++ b/src/nat/cli/main.py @@ -30,6 +30,9 @@ def run_cli(): import os import sys + # Suppress warnings from transformers + os.environ["TRANSFORMERS_VERBOSITY"] = "error" + parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) if (parent_dir not in sys.path): diff --git a/uv.lock b/uv.lock index 995d06e1b..71be25701 100644 --- a/uv.lock +++ b/uv.lock @@ -3909,6 +3909,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/b5/501c0ffcb09c734457ceaa86bc7b1dd37b6a261147bd653add03b838aacb/langchain_core-0.3.76-py3-none-any.whl", hash = "sha256:46e0eb48c7ac532432d51f8ca1ece1804c82afe9ae3dcf027b867edadf82b3ec", size = 447508, upload-time = "2025-09-10T14:49:38.179Z" }, ] +[[package]] +name = "langchain-litellm" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "litellm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/8f/08032033cd4bdff1d177d6a9e3a1021e47c4c63fd1d8c564af6f3c7e9f8d/langchain_litellm-0.2.3.tar.gz", hash = "sha256:0e11687373ae6a99efee5a04d3a76de4fab0e1459edc0e84adb6f60ca76ebf79", size = 10829, upload-time = "2025-09-25T11:01:41.295Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/26/271b1dad80b39a0e9df7ab13f63fa3fad52ce8288ddf73dec32a2212219f/langchain_litellm-0.2.3-py3-none-any.whl", hash = "sha256:422254b8742893aed6380f5ee73e6ae77869b218758edd0888d14ebd2c439352", size = 11571, upload-time = "2025-09-25T11:01:40.183Z" }, +] + [[package]] name = "langchain-milvus" version = "0.2.1" @@ -4376,6 +4389,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/2f/7fc5206467151f64764bae61abd0fbbb8392fe84def15b1467f7fb174d7b/llama_index_llms_bedrock-0.3.8-py3-none-any.whl", hash = "sha256:58b804a206146bd7228590a4ee92ce13806a21040d92cb61e3046f2ee64f66cd", size = 11516, upload-time = "2025-03-26T16:15:07.722Z" }, ] +[[package]] +name = "llama-index-llms-litellm" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "litellm" }, + { name = "llama-index-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/13/64d3d2db8924eec085c05addaf8279f0ba95b28820ea4f035d0ba77cf711/llama_index_llms_litellm-0.5.1.tar.gz", hash = "sha256:b99460b0d1ef7cf48e02d139dc5358e6818a80b95633ba67d8438c73e180f5ef", size = 10611, upload-time = "2025-06-04T11:53:39.934Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/df/87bb97f75390aff4a4a479c329d27dd27d6069451574176eaaf837f67e07/llama_index_llms_litellm-0.5.1-py3-none-any.whl", hash = "sha256:f63c8384d051a9983a86e6f00f79cd9d6a5b81a035a4cff7c1548c3ff4b4d7f0", size = 10881, upload-time = "2025-06-04T11:53:38.619Z" }, +] + [[package]] name = "llama-index-llms-nvidia" version = "0.3.3" @@ -6362,6 +6388,7 @@ source = { editable = "packages/nvidia_nat_langchain" } dependencies = [ { name = "langchain-aws" }, { name = "langchain-core" }, + { name = "langchain-litellm" }, { name = "langchain-milvus" }, { name = "langchain-nvidia-ai-endpoints" }, { name = "langchain-openai" }, @@ -6374,6 +6401,7 @@ dependencies = [ requires-dist = [ { name = "langchain-aws", specifier = "~=0.2.31" }, { name = "langchain-core", specifier = "~=0.3.75" }, + { name = "langchain-litellm", specifier = "~=0.2.3" }, { name = "langchain-milvus", specifier = "~=0.2.1" }, { name = "langchain-nvidia-ai-endpoints", specifier = "~=0.3.17" }, { name = "langchain-openai", specifier = "~=0.3.32" }, @@ -6393,6 +6421,7 @@ dependencies = [ { name = "llama-index-embeddings-openai" }, { name = "llama-index-llms-azure-openai" }, { name = "llama-index-llms-bedrock" }, + { name = "llama-index-llms-litellm" }, { name = "llama-index-llms-nvidia" }, { name = "llama-index-llms-openai" }, { name = "llama-index-readers-file" }, @@ -6408,6 +6437,7 @@ requires-dist = [ { name = "llama-index-embeddings-openai", specifier = "~=0.3.1" }, { name = "llama-index-llms-azure-openai", specifier = "~=0.3.2" }, { name = "llama-index-llms-bedrock", specifier = "~=0.3.8" }, + { name = "llama-index-llms-litellm", specifier = "~=0.5.1" }, { name = "llama-index-llms-nvidia", specifier = "~=0.3.1" }, { name = "llama-index-llms-openai", specifier = "~=0.3.42" }, { name = "llama-index-readers-file", specifier = "~=0.4.4" },