From f139e04c4a717b443d05a53e33332da334718a6f Mon Sep 17 00:00:00 2001
From: Ammar Qammaz <ammarkov@ics.forth.gr>
Date: Tue, 7 Jan 2025 15:06:38 +0200
Subject: [PATCH 1/6] fix web_demo errors

---
 web_demo.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/web_demo.py b/web_demo.py
index 894aece..11d2162 100755
--- a/web_demo.py
+++ b/web_demo.py
@@ -118,8 +118,8 @@
     [
         [
             "images/multi_image_1.jpeg",
-            "images/mi_2.jpeg",
-            "images/mi_3.jpeg"
+            "images/multi_image_2.jpeg",
+            "images/multi_image_3.jpeg"
         ],
         "能帮我用这几个食材做一道菜吗?",
     ]
@@ -663,7 +663,8 @@ def format_examples(examples_list):
     demo.title = "DeepSeek-VL2 Chatbot"
 
     reload_javascript()
-    demo.queue(concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS).launch(
+    demo.queue(#concurrency_count=CONCURRENT_COUNT, #<- for some reason this emmits an error!
+        max_size=MAX_EVENTS).launch(
         # share=False,
         share=True,
         favicon_path="deepseek_vl2/serve/assets/favicon.ico",

From 5c818a06a4b4d319249956d3caa9efd3bb2e04eb Mon Sep 17 00:00:00 2001
From: Ammar Qammaz <ammarkov@ics.forth.gr>
Date: Tue, 7 Jan 2025 15:13:59 +0200
Subject: [PATCH 2/6] added a bash script to setup DeepSeek-VL2 in a python3
 venv, including the extra steps I had to do in order for it to work

---
 scripts/linux_setup.sh | 68 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100755 scripts/linux_setup.sh

diff --git a/scripts/linux_setup.sh b/scripts/linux_setup.sh
new file mode 100755
index 0000000..4905dd5
--- /dev/null
+++ b/scripts/linux_setup.sh
@@ -0,0 +1,68 @@
+#!/bin/bash 
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$DIR"
+cd ..
+
+
+if [ -d venv/ ]
+then
+echo "Found a virtual environment" 
+source venv/bin/activate
+else 
+echo "Creating a virtual environment"
+#Simple dependency checker that will apt-get stuff if something is missing
+# sudo apt-get install python3-venv python3-pip
+SYSTEM_DEPENDENCIES="python3-venv python3-pip zip libhdf5-dev"
+
+for REQUIRED_PKG in $SYSTEM_DEPENDENCIES
+do
+PKG_OK=$(dpkg-query -W --showformat='${Status}\n' $REQUIRED_PKG|grep "install ok installed")
+echo "Checking for $REQUIRED_PKG: $PKG_OK"
+if [ "" = "$PKG_OK" ]; then
+
+  echo "No $REQUIRED_PKG. Setting up $REQUIRED_PKG."
+
+  #If this is uncommented then only packages that are missing will get prompted..
+  #sudo apt-get --yes install $REQUIRED_PKG
+
+  #if this is uncommented then if one package is missing then all missing packages are immediately installed..
+  sudo apt-get install $SYSTEM_DEPENDENCIES  
+  break
+fi
+done
+#------------------------------------------------------------------------------
+python3 -m venv venv
+source venv/bin/activate
+fi 
+
+
+#git clone https://github.com/deepseek-ai/DeepSeek-VL2
+#cd DeepSeek-VL2
+#python3 -m venv venv
+#source venv/bin/activate
+
+
+#Make sure pip is up to date
+python3 -m pip install --upgrade pip
+
+python3 -m pip install -e .
+python3 -m pip install -e .[gradio]
+
+python3 -m pip install joblib wheel
+python3 -m pip install flash-attn --no-build-isolation
+python3 -m pip install xformers
+python3 -m pip install --upgrade gradio
+
+#You can now run using :
+#CUDA_VISIBLE_DEVICES=2 python3 web_demo.py --model_name "deepseek-ai/deepseek-vl2-tiny"  --port 8080
+
+echo "From now on you can run the web demo using: "
+DEMO_DIR=`pwd`
+echo "cd $DEMO_DIR"
+echo "source venv/bin/activate"
+echo "python3 web_demo.py --model_name \"deepseek-ai/deepseek-vl2-tiny\" --port 8080"
+
+
+exit 0
+

From db6c917643b749e84ca452dee57468c6c234e114 Mon Sep 17 00:00:00 2001
From: Ammar Qammaz <ammarkov@ics.forth.gr>
Date: Tue, 7 Jan 2025 15:56:35 +0200
Subject: [PATCH 3/6] add a client utility that can script questions to the web
 demo

---
 client.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100755 client.py

diff --git a/client.py b/client.py
new file mode 100755
index 0000000..6d72501
--- /dev/null
+++ b/client.py
@@ -0,0 +1,145 @@
+import json
+import time
+import sys
+import os
+from gradio_client import Client, handle_file
+
+# Replace with the actual server URL if different
+ip = "127.0.0.1"
+port = "8080"
+
+# Define the user prompt (caption)
+user_prompt = "Thoroughly and carefully describe this image."
+
+files = []
+output_file = "output.json"
+
+# Hyperparameters
+temperature = 0.6
+top_k = 50
+top_p = 0.9
+max_tokens = 100
+
+startAt = 0
+
+argumentStart = 1
+if len(sys.argv) > 1:
+    for i in range(0, len(sys.argv)):
+        if sys.argv[i] == "--ip":
+            ip = sys.argv[i + 1]
+            argumentStart += 2
+        if sys.argv[i] == "--directory":
+            directory = sys.argv[i + 1]
+            argumentStart += 2
+            # Populate files with image (.jpg, .png) contents of directory
+            if os.path.isdir(directory):
+                directoryList = os.listdir(directory)
+                directoryList.sort()
+                for file in directoryList:
+                    if file.lower().endswith(('.jpg', '.png', '.jpeg', '.txt')):
+                        files.append(os.path.join(directory, file))
+            else:
+                print(f"Error: Directory '{directory}' does not exist.")
+                sys.exit(1)
+        elif sys.argv[i] == "--start":
+            startAt = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--port":
+            port = sys.argv[i + 1]
+            argumentStart += 2
+        elif sys.argv[i] == "--prompt":
+            user_prompt = sys.argv[i + 1]
+            argumentStart += 2
+        elif sys.argv[i] == "--temperature":
+            temperature = float(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--top_k":
+            top_k = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--top_p":
+            top_p = float(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--max_tokens":
+            max_tokens = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] in ("--output", "-o"):
+            output_file = sys.argv[i + 1]
+            argumentStart += 2
+
+# Initialize the Gradio client with the server URL
+client = Client(f"http://{ip}:{port}")
+
+results = {"prompt": user_prompt}
+
+for i in range(argumentStart, len(sys.argv)):
+    files.append(sys.argv[i])
+
+# Make sure the list is sorted
+files.sort()
+
+# Possibly start at a specific index
+for i in range(startAt, len(files)):
+    # Grab the next image path
+    image_path = files[i]
+
+    # Count start time
+    start = time.time()
+
+    # Make query to VLLM
+    try:
+        imageFile = None
+        this_user_prompt = user_prompt
+        if image_path.endswith('.txt'):
+            with open(image_path, 'r') as txt_file:
+                this_user_prompt = txt_file.read().strip()
+        else:
+            imageFile = handle_file(image_path)
+
+        # Send the image file path and the prompt to the Gradio app for processing
+        result = client.predict(
+            input_images=[imageFile],           # Provide the file path directly
+            input_text=this_user_prompt,     # Adapted prompt parameter
+            api_name="/transfer_input"
+        )
+
+        result = client.predict(
+		    chatbot=[],
+            temperature=temperature,
+            #top_k=top_k,
+            top_p=top_p,
+            max_length_tokens=max_tokens, # Adapted max_tokens parameter
+		    repetition_penalty=1.1,
+		    max_context_length_tokens=4096,
+		    model_select_dropdown="deepseek-ai/deepseek-vl2-tiny",
+            api_name="/predict"
+        )
+
+
+
+    except Exception as e:
+        print(f"Failed to complete job at index {i}: {e}")
+        output_file = f"partial_until_{i}_{output_file}"
+        break
+
+    # Calculate elapsed time
+    seconds = time.time() - start
+    remaining = (len(files) - i) * seconds
+    hz = 1 / (seconds + 0.0001)
+
+    # Output the result
+    print("result[0][0][1] ",result[0][0][1])
+    question = this_user_prompt #Don't try to recover it from the list..
+    response = result[0][0][1]
+
+    # Print on screen
+    print(f"Processing {1 + i}/{len(files)} | {hz:.2f} Hz / remaining {remaining / 60:.2f} minutes")
+    print(f"Image: {image_path}\nResponse: {response}")
+
+    # Store each path as the key pointing to each description
+    results[image_path] = response
+
+# Save results to JSON
+print(f"\n\n\nStoring results in JSON file {output_file}")
+with open(output_file, "w") as outfile:
+    json.dump(results, outfile, indent=4)
+

From c59bfe31645d46a92117617cf68c0a93fc51fd08 Mon Sep 17 00:00:00 2001
From: Ammar Qammaz <ammarkov@ics.forth.gr>
Date: Tue, 7 Jan 2025 15:57:13 +0200
Subject: [PATCH 4/6] reduce console output

---
 client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/client.py b/client.py
index 6d72501..bc14c6d 100755
--- a/client.py
+++ b/client.py
@@ -127,7 +127,7 @@
     hz = 1 / (seconds + 0.0001)
 
     # Output the result
-    print("result[0][0][1] ",result[0][0][1])
+    #print("result[0][0][1] ",result[0][0][1])
     question = this_user_prompt #Don't try to recover it from the list..
     response = result[0][0][1]
 

From 622c090548819988609bc45f3f98ed9e1c3d25a2 Mon Sep 17 00:00:00 2001
From: Ammar Qammaz <ammarkov@ics.forth.gr>
Date: Tue, 7 Jan 2025 16:02:59 +0200
Subject: [PATCH 5/6] now explicitly resetting state (is this needed?)

---
 client.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/client.py b/client.py
index bc14c6d..75d001a 100755
--- a/client.py
+++ b/client.py
@@ -95,6 +95,9 @@
         else:
             imageFile = handle_file(image_path)
 
+        # Reset state 
+        result = client.predict(api_name="/reset_state" )
+
         # Send the image file path and the prompt to the Gradio app for processing
         result = client.predict(
             input_images=[imageFile],           # Provide the file path directly

From a4126eca81179f2e442a01aea2925f6a8f5d6246 Mon Sep 17 00:00:00 2001
From: Ammar Qammaz <ammarkov@ics.forth.gr>
Date: Tue, 7 Jan 2025 16:34:20 +0200
Subject: [PATCH 6/6] ...

---
 client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/client.py b/client.py
index 75d001a..e6cbc26 100755
--- a/client.py
+++ b/client.py
@@ -113,7 +113,7 @@
             max_length_tokens=max_tokens, # Adapted max_tokens parameter
 		    repetition_penalty=1.1,
 		    max_context_length_tokens=4096,
-		    model_select_dropdown="deepseek-ai/deepseek-vl2-tiny",
+		    #model_select_dropdown="deepseek-ai/deepseek-vl2-tiny",
             api_name="/predict"
         )