From f139e04c4a717b443d05a53e33332da334718a6f Mon Sep 17 00:00:00 2001 From: Ammar Qammaz Date: Tue, 7 Jan 2025 15:06:38 +0200 Subject: [PATCH 1/6] fix web_demo errors --- web_demo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/web_demo.py b/web_demo.py index 894aece..11d2162 100755 --- a/web_demo.py +++ b/web_demo.py @@ -118,8 +118,8 @@ [ [ "images/multi_image_1.jpeg", - "images/mi_2.jpeg", - "images/mi_3.jpeg" + "images/multi_image_2.jpeg", + "images/multi_image_3.jpeg" ], "能帮我用这几个食材做一道菜吗?", ] @@ -663,7 +663,8 @@ def format_examples(examples_list): demo.title = "DeepSeek-VL2 Chatbot" reload_javascript() - demo.queue(concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS).launch( + demo.queue(#concurrency_count=CONCURRENT_COUNT, #<- for some reason this emmits an error! + max_size=MAX_EVENTS).launch( # share=False, share=True, favicon_path="deepseek_vl2/serve/assets/favicon.ico", From 5c818a06a4b4d319249956d3caa9efd3bb2e04eb Mon Sep 17 00:00:00 2001 From: Ammar Qammaz Date: Tue, 7 Jan 2025 15:13:59 +0200 Subject: [PATCH 2/6] added a bash script to setup DeepSeek-VL2 in a python3 venv, including the extra steps I had to do in order for it to work --- scripts/linux_setup.sh | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 scripts/linux_setup.sh diff --git a/scripts/linux_setup.sh b/scripts/linux_setup.sh new file mode 100755 index 0000000..4905dd5 --- /dev/null +++ b/scripts/linux_setup.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" +cd .. + + +if [ -d venv/ ] +then +echo "Found a virtual environment" +source venv/bin/activate +else +echo "Creating a virtual environment" +#Simple dependency checker that will apt-get stuff if something is missing +# sudo apt-get install python3-venv python3-pip +SYSTEM_DEPENDENCIES="python3-venv python3-pip zip libhdf5-dev" + +for REQUIRED_PKG in $SYSTEM_DEPENDENCIES +do +PKG_OK=$(dpkg-query -W --showformat='${Status}\n' $REQUIRED_PKG|grep "install ok installed") +echo "Checking for $REQUIRED_PKG: $PKG_OK" +if [ "" = "$PKG_OK" ]; then + + echo "No $REQUIRED_PKG. Setting up $REQUIRED_PKG." + + #If this is uncommented then only packages that are missing will get prompted.. + #sudo apt-get --yes install $REQUIRED_PKG + + #if this is uncommented then if one package is missing then all missing packages are immediately installed.. + sudo apt-get install $SYSTEM_DEPENDENCIES + break +fi +done +#------------------------------------------------------------------------------ +python3 -m venv venv +source venv/bin/activate +fi + + +#git clone https://github.com/deepseek-ai/DeepSeek-VL2 +#cd DeepSeek-VL2 +#python3 -m venv venv +#source venv/bin/activate + + +#Make sure pip is up to date +python3 -m pip install --upgrade pip + +python3 -m pip install -e . +python3 -m pip install -e .[gradio] + +python3 -m pip install joblib wheel +python3 -m pip install flash-attn --no-build-isolation +python3 -m pip install xformers +python3 -m pip install --upgrade gradio + +#You can now run using : +#CUDA_VISIBLE_DEVICES=2 python3 web_demo.py --model_name "deepseek-ai/deepseek-vl2-tiny" --port 8080 + +echo "From now on you can run the web demo using: " +DEMO_DIR=`pwd` +echo "cd $DEMO_DIR" +echo "source venv/bin/activate" +echo "python3 web_demo.py --model_name \"deepseek-ai/deepseek-vl2-tiny\" --port 8080" + + +exit 0 + From db6c917643b749e84ca452dee57468c6c234e114 Mon Sep 17 00:00:00 2001 From: Ammar Qammaz Date: Tue, 7 Jan 2025 15:56:35 +0200 Subject: [PATCH 3/6] add a client utility that can script questions to the web demo --- client.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100755 client.py diff --git a/client.py b/client.py new file mode 100755 index 0000000..6d72501 --- /dev/null +++ b/client.py @@ -0,0 +1,145 @@ +import json +import time +import sys +import os +from gradio_client import Client, handle_file + +# Replace with the actual server URL if different +ip = "127.0.0.1" +port = "8080" + +# Define the user prompt (caption) +user_prompt = "Thoroughly and carefully describe this image." + +files = [] +output_file = "output.json" + +# Hyperparameters +temperature = 0.6 +top_k = 50 +top_p = 0.9 +max_tokens = 100 + +startAt = 0 + +argumentStart = 1 +if len(sys.argv) > 1: + for i in range(0, len(sys.argv)): + if sys.argv[i] == "--ip": + ip = sys.argv[i + 1] + argumentStart += 2 + if sys.argv[i] == "--directory": + directory = sys.argv[i + 1] + argumentStart += 2 + # Populate files with image (.jpg, .png) contents of directory + if os.path.isdir(directory): + directoryList = os.listdir(directory) + directoryList.sort() + for file in directoryList: + if file.lower().endswith(('.jpg', '.png', '.jpeg', '.txt')): + files.append(os.path.join(directory, file)) + else: + print(f"Error: Directory '{directory}' does not exist.") + sys.exit(1) + elif sys.argv[i] == "--start": + startAt = int(sys.argv[i + 1]) + argumentStart += 2 + elif sys.argv[i] == "--port": + port = sys.argv[i + 1] + argumentStart += 2 + elif sys.argv[i] == "--prompt": + user_prompt = sys.argv[i + 1] + argumentStart += 2 + elif sys.argv[i] == "--temperature": + temperature = float(sys.argv[i + 1]) + argumentStart += 2 + elif sys.argv[i] == "--top_k": + top_k = int(sys.argv[i + 1]) + argumentStart += 2 + elif sys.argv[i] == "--top_p": + top_p = float(sys.argv[i + 1]) + argumentStart += 2 + elif sys.argv[i] == "--max_tokens": + max_tokens = int(sys.argv[i + 1]) + argumentStart += 2 + elif sys.argv[i] in ("--output", "-o"): + output_file = sys.argv[i + 1] + argumentStart += 2 + +# Initialize the Gradio client with the server URL +client = Client(f"http://{ip}:{port}") + +results = {"prompt": user_prompt} + +for i in range(argumentStart, len(sys.argv)): + files.append(sys.argv[i]) + +# Make sure the list is sorted +files.sort() + +# Possibly start at a specific index +for i in range(startAt, len(files)): + # Grab the next image path + image_path = files[i] + + # Count start time + start = time.time() + + # Make query to VLLM + try: + imageFile = None + this_user_prompt = user_prompt + if image_path.endswith('.txt'): + with open(image_path, 'r') as txt_file: + this_user_prompt = txt_file.read().strip() + else: + imageFile = handle_file(image_path) + + # Send the image file path and the prompt to the Gradio app for processing + result = client.predict( + input_images=[imageFile], # Provide the file path directly + input_text=this_user_prompt, # Adapted prompt parameter + api_name="/transfer_input" + ) + + result = client.predict( + chatbot=[], + temperature=temperature, + #top_k=top_k, + top_p=top_p, + max_length_tokens=max_tokens, # Adapted max_tokens parameter + repetition_penalty=1.1, + max_context_length_tokens=4096, + model_select_dropdown="deepseek-ai/deepseek-vl2-tiny", + api_name="/predict" + ) + + + + except Exception as e: + print(f"Failed to complete job at index {i}: {e}") + output_file = f"partial_until_{i}_{output_file}" + break + + # Calculate elapsed time + seconds = time.time() - start + remaining = (len(files) - i) * seconds + hz = 1 / (seconds + 0.0001) + + # Output the result + print("result[0][0][1] ",result[0][0][1]) + question = this_user_prompt #Don't try to recover it from the list.. + response = result[0][0][1] + + # Print on screen + print(f"Processing {1 + i}/{len(files)} | {hz:.2f} Hz / remaining {remaining / 60:.2f} minutes") + print(f"Image: {image_path}\nResponse: {response}") + + # Store each path as the key pointing to each description + results[image_path] = response + +# Save results to JSON +print(f"\n\n\nStoring results in JSON file {output_file}") +with open(output_file, "w") as outfile: + json.dump(results, outfile, indent=4) + From c59bfe31645d46a92117617cf68c0a93fc51fd08 Mon Sep 17 00:00:00 2001 From: Ammar Qammaz Date: Tue, 7 Jan 2025 15:57:13 +0200 Subject: [PATCH 4/6] reduce console output --- client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client.py b/client.py index 6d72501..bc14c6d 100755 --- a/client.py +++ b/client.py @@ -127,7 +127,7 @@ hz = 1 / (seconds + 0.0001) # Output the result - print("result[0][0][1] ",result[0][0][1]) + #print("result[0][0][1] ",result[0][0][1]) question = this_user_prompt #Don't try to recover it from the list.. response = result[0][0][1] From 622c090548819988609bc45f3f98ed9e1c3d25a2 Mon Sep 17 00:00:00 2001 From: Ammar Qammaz Date: Tue, 7 Jan 2025 16:02:59 +0200 Subject: [PATCH 5/6] now explicitly resetting state (is this needed?) --- client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client.py b/client.py index bc14c6d..75d001a 100755 --- a/client.py +++ b/client.py @@ -95,6 +95,9 @@ else: imageFile = handle_file(image_path) + # Reset state + result = client.predict(api_name="/reset_state" ) + # Send the image file path and the prompt to the Gradio app for processing result = client.predict( input_images=[imageFile], # Provide the file path directly From a4126eca81179f2e442a01aea2925f6a8f5d6246 Mon Sep 17 00:00:00 2001 From: Ammar Qammaz Date: Tue, 7 Jan 2025 16:34:20 +0200 Subject: [PATCH 6/6] ... --- client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client.py b/client.py index 75d001a..e6cbc26 100755 --- a/client.py +++ b/client.py @@ -113,7 +113,7 @@ max_length_tokens=max_tokens, # Adapted max_tokens parameter repetition_penalty=1.1, max_context_length_tokens=4096, - model_select_dropdown="deepseek-ai/deepseek-vl2-tiny", + #model_select_dropdown="deepseek-ai/deepseek-vl2-tiny", api_name="/predict" )