diff --git a/.gitignore b/.gitignore
index 76edf0916..60f358611 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 ###### Place new entries directly below this line! ######
+examples/rag_library_mode/rag_library_mode/src/rag_library_mode/deploy/volumes/*
 
 # auto-generated chainlit stuff
 chainlit.md
diff --git a/examples/rag_lib/README.md b/examples/rag_lib/README.md
new file mode 100644
index 000000000..c0db83700
--- /dev/null
+++ b/examples/rag_lib/README.md
@@ -0,0 +1,80 @@
+# NVIDIA RAG Python Package Usage Guide
+
+This guide demonstrates how to use a NAT agent with the NVIDIA RAG Python client as a tool.
+# Get Started With NVIDIA RAG Blueprint
+
+Clone the RAG repo from here: https://github.com/NVIDIA-AI-Blueprints/rag
+
+Install the RAG Library using one of the following options:
+
+# (Option 1) Build the wheel from source and install the Nvidia RAG wheel
+uv build
+uv pip install dist/nvidia_rag-2.2.1-py3-none-any.whl[all]
+
+# (Option 2) Install the package in editable (development) mode from source
+uv pip install -e .[all]
+
+# (Option 3) Install the prebuilt wheel file from pypi. This does not require you to clone the repo.
+uv pip install nvidia-rag[all]
+
+Open the library usage guide in this notebook https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_library_usage.ipynb and follow the steps to deploy your RAG server and ingest your documents (skip the installation steps as we have already installed the library)
+
+An example file that you can ingest is provided under `nemo-agent-toolkit/examples/rag_lib/data/cuda.txt`
+
+#### Configure Your Agent
+
+Configure your Agent to use the Milvus collections for RAG. We have pre-configured a configuration file for you in `examples/RAG/simple_rag/configs/milvus_rag_config.yml`. You can modify this file to point to your Milvus instance and collections or add tools to your agent. The agent, by default, is a `tool_calling` agent that can be used to interact with the retriever component. The configuration file is shown below. You can also modify your agent to be another one of the NeMo Agent toolkit pre-built agent implementations such as the `react_agent`
+
+    ```yaml
+    general:
+    use_uvloop: true
+
+
+      functions:
+      rag_tool:
+        _type: rag_lib
+        base_url: "http://localhost:19530"
+        vdb_top_k: 20
+        reranker_top_k: 10
+        collection_names: ["test_library"]
+        topic: Retrieve relevant documents from the database relevant to the query
+
+
+    llms:
+      nim_llm:
+        _type: nim
+        model_name: meta/llama-3.3-70b-instruct
+        temperature: 0
+        max_tokens: 4096
+        top_p: 1
+
+
+    workflow:
+      _type: tool_calling_agent
+      tool_names:
+      - rag_tool
+      verbose: true
+      llm_name: nim_llm
+    ```
+
+    If you have a different Milvus instance or collection names, you can modify the `vdb_url` and the `collection_names` in the config file to point to your instance and collections. 
+    You can also modify the retrieval parameters like `vdb_top_k`, ...
+    You can also add additional functions as tools for your agent in the `functions` section.
+
+#### Install the Workflow
+```bash
+uv pip install -e examples/rag_lib
+```
+
+#### Run the Workflow
+
+```bash
+nat run --config_file examples/rag_lib/src/rag_lib/configs/config.yml --input "How do I install CUDA"
+```
+
+The expected workflow result of running the above command is:
+```console
+['To install CUDA, you typically need to: \n1. Verify you have a CUDA-capable GPU and a supported version of your operating system.\n2. Download the NVIDIA CUDA Toolkit from the official NVIDIA website.\n3. Choose an installation method, such as a local repository installation or a network repository installation, depending on your system.\n4. Follow the specific instructions for your operating system, which may include installing local repository packages, enabling network repositories, or running installer scripts.\n5. Reboot your system and perform post-installation actions, such as setting up your environment and verifying the installation by running sample projects. \n\nPlease refer to the official NVIDIA CUDA documentation for detailed instructions tailored to your specific operating system and distribution.']
+
+
+
diff --git a/examples/rag_lib/data/cuda.txt b/examples/rag_lib/data/cuda.txt
new file mode 100644
index 000000000..e2b285f57
--- /dev/null
+++ b/examples/rag_lib/data/cuda.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c75e2fd240e1419b246d89b2fa4e586273c42e96f1a42550a8b06a0abd79024
+size 1653
diff --git a/examples/rag_lib/pyproject.toml b/examples/rag_lib/pyproject.toml
new file mode 100644
index 000000000..e64b296db
--- /dev/null
+++ b/examples/rag_lib/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = ["setuptools >= 64", "setuptools-scm>=8"]
+
+[tool.setuptools_scm]
+# NAT uses the --first-parent flag to avoid tags from previous releases which have been merged into the develop branch
+# from causing an unexpected version change. This can be safely removed if developing outside of the NAT repository.
+git_describe_command = "git describe --long --first-parent"
+root = "../.."
+
+[project]
+name = "rag_lib"
+dynamic = ["version"]
+dependencies = [
+  "nvidia-nat[langchain]~=0.1",
+]
+requires-python = ">=3.11,<3.13"
+description = "Custom NeMo Agent Toolkit Workflow"
+classifiers = ["Programming Language :: Python"]
+
+[tool.uv.sources]
+nvidia-nat = { path = "../..", editable = true }
+
+[project.entry-points.'nat.components']
+rag_lib = "rag_lib.register"
\ No newline at end of file
diff --git a/examples/rag_lib/src/rag_lib/__init__.py b/examples/rag_lib/src/rag_lib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/rag_lib/src/rag_lib/configs/config.yml b/examples/rag_lib/src/rag_lib/configs/config.yml
new file mode 100644
index 000000000..caa70cc89
--- /dev/null
+++ b/examples/rag_lib/src/rag_lib/configs/config.yml
@@ -0,0 +1,29 @@
+general:
+  use_uvloop: true
+
+
+functions:
+  rag_tool:
+    _type: rag_lib
+    base_url: "http://localhost:19530"
+    vdb_top_k: 20
+    reranker_top_k: 10
+    collection_names: ["test_library"]
+    topic: Retrieve relevant documents from the database relevant to the query
+
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.3-70b-instruct
+    temperature: 0
+    max_tokens: 4096
+    top_p: 1
+
+
+workflow:
+  _type: tool_calling_agent
+  tool_names:
+   - rag_tool
+  verbose: true
+  llm_name: nim_llm
diff --git a/examples/rag_lib/src/rag_lib/rag_lib_function.py b/examples/rag_lib/src/rag_lib/rag_lib_function.py
new file mode 100644
index 000000000..d6817184c
--- /dev/null
+++ b/examples/rag_lib/src/rag_lib/rag_lib_function.py
@@ -0,0 +1,70 @@
+import logging
+
+from pydantic import Field
+
+from nat.builder.builder import Builder
+from nat.builder.function_info import FunctionInfo
+from nat.cli.register_workflow import register_function
+from nat.data_models.function import FunctionBaseConfig
+
+from nvidia_rag import NvidiaRAG, NvidiaRAGIngestor
+
+import json
+import base64
+from IPython.display import display, Image, Markdown
+
+
+logger = logging.getLogger(__name__)
+
+
+class RagLibFunctionConfig(FunctionBaseConfig, name="rag_lib"):
+    """
+    This tool retrieves relevant documents for a given user query. The input query is mapped to the most appropriate
+    Milvus collection database. This will return relevant documents from the selected collection.
+    """
+    base_url: str = Field(description="The base url used to connect to the milvus database.")
+    reranker_top_k: int = Field(default=100, description="The number of results to return from the milvus database.")
+    vdb_top_k: int = Field(default=10, description="The number of results to return from the milvus database.")
+    collection_names: list = Field(default=["cuda_docs"],
+                                   description="The list of available collection names.")
+    
+
+
+@register_function(config_type=RagLibFunctionConfig)
+async def rag_lib_function(
+    config: RagLibFunctionConfig, builder: Builder
+):
+
+    def parse_search_citations(citations):
+
+        parsed_docs = []
+        
+        for idx, citation in enumerate(citations.results):
+        # If using pydantic models, citation fields may be attributes, not dict keys
+            content = getattr(citation, 'content', '')
+            doc_name = getattr(citation, 'document_name', f'Citation {idx+1}')
+            parsed_document = f'<Document source="{doc_name}"/>\n{content}\n</Document>'
+            parsed_docs.append(parsed_document)
+
+            # combine parsed documents into a single string
+            internal_search_docs = "\n\n---\n\n".join(parsed_docs)
+            return internal_search_docs
+
+    async def _response_fn(query: str) -> str:
+        # Process the input_message and generate output
+
+        rag = NvidiaRAG()
+        
+        return parse_search_citations(rag.search(
+            query=f"{query}",
+            collection_names=config.collection_names,
+            reranker_top_k=config.reranker_top_k,
+            vdb_top_k=config.vdb_top_k,
+        ))  
+
+    try:
+        yield FunctionInfo.create(single_fn=_response_fn)
+    except GeneratorExit:
+        logger.warning("Function exited early!")
+    finally:
+        logger.info("Cleaning up rag_lib_mode workflow.")
\ No newline at end of file
diff --git a/examples/rag_lib/src/rag_lib/register.py b/examples/rag_lib/src/rag_lib/register.py
new file mode 100644
index 000000000..0bff1ab8c
--- /dev/null
+++ b/examples/rag_lib/src/rag_lib/register.py
@@ -0,0 +1,4 @@
+# flake8: noqa
+
+# Import any tools which need to be automatically registered here
+from rag_lib import rag_lib_function
\ No newline at end of file