From d5b7511a3c51937abf7b21402b826e28de58aabd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cihan=20Yal=C3=A7=C4=B1n?=
 <113353248+g-hano@users.noreply.github.com>
Date: Tue, 8 Oct 2024 03:21:35 +0300
Subject: [PATCH] MultiModal.HuggingFaceMultiModal: fix errors and README, add
 stream_complete (#16376)

fix imports
---
 .../README.md                                 | 41 ++++++++++++++++---
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-huggingface/README.md b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-huggingface/README.md
index 4b698974d5ea3..10dcaea5bee98 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-huggingface/README.md
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-huggingface/README.md
@@ -35,7 +35,7 @@ Here's a basic example of how to use the Hugging Face multimodal integration:
 
 ```python
 from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
-from llama_index.schema import ImageDocument
+from llama_index.core.schema import ImageDocument
 
 # Initialize the model
 model = HuggingFaceMultiModal.from_model_name("Qwen/Qwen2-VL-2B-Instruct")
@@ -50,14 +50,45 @@ response = model.complete(prompt, image_documents=[image_document])
 print(response.text)
 ```
 
+### Streaming
+
+```python
+from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
+from llama_index.core.schema import ImageDocument
+
+# Initialize the model
+model = HuggingFaceMultiModal.from_model_name("Qwen/Qwen2-VL-2B-Instruct")
+
+# Prepare your image and prompt
+image_document = ImageDocument(image_path="downloaded_image.jpg")
+prompt = "Describe this image in detail."
+
+import nest_asyncio
+import asyncio
+
+nest_asyncio.apply()
+
+
+async def stream_output():
+    for chunk in model.stream_complete(
+        prompt, image_documents=[image_document]
+    ):
+        print(chunk.delta, end="", flush=True)
+        await asyncio.sleep(0)
+
+
+asyncio.run(stream_output())
+```
+
 You can also refer to this [Colab notebook](examples\huggingface_multimodal.ipynb)
 
 ## Supported Models
 
-1. Qwen2VisionMultiModal
-2. Florence2MultiModal
-3. Phi35VisionMultiModal
-4. PaliGemmaMultiModal
+1. Qwen2 Vision
+2. Florence2
+3. Phi3.5 Vision
+4. PaliGemma
+5. Mllama
 
 Each model has its unique capabilities and can be selected based on your specific use case.