v3.0.0

AstraBert · AstraBert · commit 6b62d0855015 · 2024-05-29T23:02:10.000+02:00
diff --git a/.env b/.env
@@ -1,2 +1,7 @@
 VOLUME="/source/local-machine/dir:target/multi-container/app/dir"
-# VOLUME="c:/Users/User/:/User" e.g.
+# VOLUME="c:/Users/User/:/User" e.g.
+MODELS_PATH="/path/to/gguf/models"
+# MODELS_PATH="c:/Users/User/.cache/llama.cpp/"
+MODEL="your_favorite_model.gguf"
+# MODEL="stories260K.gguf"
+MAX_TOKENS="512"
diff --git a/README.md b/README.md
@@ -23,18 +23,26 @@ git clone https://github.com/AstraBert/everything-ai.git
 cd everything-ai
 ```
 ### 2. Set your `.env` file
-Modify the `VOLUME` variable in the .env file so that you can mount your local file system into Docker container.
+Modify:
+- `VOLUME` variable in the .env file so that you can mount your local file system into Docker container.
+- `MODELS_PATH` variable in the .env file so that you can tell llama.cpp where you stored the GGUF models you downloaded.
+- `MODEL` variable in the .env file so that you can tell llama.cpp what model to use (use the actual name of the gguf file, and do not forget the .gguf extension!)
+- `MAX_TOKENS` variable in the .env file so that you can tell llama.cpp how many new tokens it can generate as output.
 
-An example could be:
+An example of a `.env` file could be:
 ```bash
 VOLUME="c:/Users/User/:/User/"
+MODELS_PATH="c:/Users/User/.cache/llama.cpp/"
+MODEL="stories260K.gguf"
+MAX_TOKENS="512"
 ```
-This means that now everything that is under "c:/Users/User/" on your local machine is under "/User/" in your Docker container.
+This means that now everything that is under "c:/Users/User/" on your local machine is under "/User/" in your Docker container, that llama.cpp knows where to look for models and what model to look for, along with the maximum new tokens for its output.
 
 ### 3. Pull the necessary images
 ```bash
-docker pull astrabert/everything-ai
-docker pull qdrant/qdrant
+docker pull astrabert/everything-ai:latest
+docker pull qdrant/qdrant:latest
+docker pull ghcr.io/ggerganov/llama.cpp:server
 ```
 ### 4. Run the multi-container app
 ```bash
@@ -63,6 +71,7 @@ Choose the task among:
 - *protein-folding*: get the 3D structure of a protein from its amino-acid sequence, using ESM-2 backbone model - **GPU ONLY**
 - *autotrain*: fine-tune a model on a specific downstream task with autotrain-advanced, just by specifying you HF username, HF writing token and the path to a yaml config file for the training
 - *spaces-api-supabase*: use HF Spaces API in combination with Supabase PostgreSQL databases in order to unleash more powerful LLMs and larger RAG-oriented vector databases - **MULTILINGUAL**
+- *llama.cpp-and-qdrant*: same as *retrieval-text-generation*, but uses **llama.cpp** as inference engine, so you MUST NOT specify a model - **MULTILINGUAL**
 - *image-retrieval-search*: search an image database uploading a folder as database input. The folder should have the following structure:
 
 ```
@@ -87,4 +96,3 @@ Once everything is ready, you can head over to `localhost:7860` and start using
 </div>
 
 
-## Complete documentation is coming soon...🚀
diff --git a/compose.yaml b/compose.yaml
@@ -6,7 +6,7 @@ services:
   everything-ai:
     image: astrabert/everything-ai
     volumes:
-      - ${VOLUME}
+      - $VOLUME
     networks:
       - mynet
     ports:
@@ -19,4 +19,14 @@ services:
     volumes:
       - "./qdrant_storage:/qdrant/storage"
     networks:
-      - mynet
+      - mynet
+  llama_server:
+    image: ghcr.io/ggerganov/llama.cpp:server
+    ports:
+      - "8000:8000"
+    volumes:
+      - "$MODELS_PATH:/models"
+    networks:
+      - mynet
+    command: "-m /models/$MODEL --port 8000 --host 0.0.0.0 -n $MAX_TOKENS"
+  
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -7,9 +7,6 @@ WORKDIR /app
 # Add the current directory contents into the container at /app
 ADD . /app
 
-#Upgrade gradio
-RUN pip install gradio_molecule3d
-
 # Expose the port that the application will run on
 EXPOSE 8760
 
diff --git a/docker/llama_cpp_int.py b/docker/llama_cpp_int.py
@@ -0,0 +1,131 @@
+from utils import Translation, PDFdatabase, NeuralSearcher
+import gradio as gr
+from qdrant_client import QdrantClient
+from sentence_transformers import SentenceTransformer
+from argparse import ArgumentParser
+import os
+
+argparse = ArgumentParser()
+
+argparse.add_argument(
+    "-pf",
+    "--pdf_file",
+    help="Single pdf file or N pdfs reported like this: /path/to/file1.pdf,/path/to/file2.pdf,...,/path/to/fileN.pdf (there is no strict naming, you just need to provide them comma-separated)",
+    required=False,
+    default="No file"
+)
+
+argparse.add_argument(
+    "-d",
+    "--directory",
+    help="Directory where all your pdfs of interest are stored",
+    required=False,
+    default="No directory"
+)
+
+argparse.add_argument(
+    "-l",
+    "--language",
+    help="Language of the written content contained in the pdfs",
+    required=False,
+    default="Same as query"
+)
+
+args = argparse.parse_args()
+
+
+pdff = args.pdf_file
+dirs = args.directory
+lan = args.language
+
+
+if pdff.replace("\\","").replace("'","") != "None" and dirs.replace("\\","").replace("'","") == "No directory":
+    pdfs = pdff.replace("\\","/").replace("'","").split(",")
+else:
+    pdfs = [os.path.join(dirs.replace("\\","/").replace("'",""), f) for f in os.listdir(dirs.replace("\\","/").replace("'","")) if f.endswith(".pdf")]
+
+client = QdrantClient(host="host.docker.internal", port="6333")
+encoder = SentenceTransformer("all-MiniLM-L6-v2")
+
+pdfdb = PDFdatabase(pdfs, encoder, client)
+pdfdb.preprocess()
+pdfdb.collect_data()
+pdfdb.qdrant_collection_and_upload()
+
+
+import requests
+
+def llama_cpp_respond(query, max_new_tokens):
+    url = "http://host.docker.internal:8000/completion"
+    headers = {
+        "Content-Type": "application/json"
+    }
+    data = {
+        "prompt": query,
+        "n_predict": int(max_new_tokens)
+    }
+
+    response = requests.post(url, headers=headers, json=data)
+
+    a = response.json()
+    return a["content"]
+
+
+def reply(max_new_tokens, message):
+    global pdfdb
+    txt = Translation(message, "en")
+    if txt.original == "en" and lan.replace("\\","").replace("'","") == "None":
+        txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
+        results = txt2txt.search(message)
+        response = llama_cpp_respond(results[0]["text"], max_new_tokens)
+        return response
+    elif txt.original == "en" and lan.replace("\\","").replace("'","") != "None":
+        txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
+        transl = Translation(message, lan.replace("\\","").replace("'",""))
+        message = transl.translatef()
+        results = txt2txt.search(message)
+        t = Translation(results[0]["text"], txt.original)
+        res = t.translatef()
+        response = llama_cpp_respond(res, max_new_tokens)
+        return response
+    elif txt.original != "en" and lan.replace("\\","").replace("'","") == "None":
+        txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
+        results = txt2txt.search(message)
+        transl = Translation(results[0]["text"], "en")
+        translation = transl.translatef()
+        response = llama_cpp_respond(translation, max_new_tokens)
+        t = Translation(response, txt.original)
+        res = t.translatef()
+        return res
+    else:
+        txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
+        transl = Translation(message, lan.replace("\\","").replace("'",""))
+        message = transl.translatef()
+        results = txt2txt.search(message)
+        t = Translation(results[0]["text"], txt.original)
+        res = t.translatef()
+        response = llama_cpp_respond(res, max_new_tokens)
+        tr = Translation(response, txt.original)
+        ress = tr.translatef()
+        return ress 
+    
+demo = gr.Interface(
+    reply,
+    [
+        gr.Textbox(
+            label="Max new tokens",
+            info="The number reported should not be higher than the one specified within the .env file",
+            lines=3,
+            value=f"512",
+        ),
+        gr.Textbox(
+            label="Input query",
+            info="Write your input query here",
+            lines=3,
+            value=f"What are penguins?",
+        )
+    ],
+    title="everything-ai-llamacpp",
+    outputs="textbox"
+)
+demo.launch(server_name="0.0.0.0", share=False)
diff --git a/docker/retrieval_text_generation.py b/docker/retrieval_text_generation.py
@@ -48,10 +48,10 @@
 lan = args.language
 
 
-if pdff.replace("\\","").replace("'","") != "None" and dirs.replace("\\","").replace("'","") == "None":
-    pdfs = pdff.replace("\\","").replace("'","").split(",")
+if pdff.replace("\\","").replace("'","") != "None" and dirs.replace("\\","").replace("'","") == "No directory":
+    pdfs = pdff.replace("\\","/").replace("'","").split(",")
 else:
-    pdfs = [os.path.join(dirs.replace("\\","").replace("'",""), f) for f in os.listdir(dirs.replace("\\","").replace("'","")) if f.endswith(".pdf")]
+    pdfs = [os.path.join(dirs.replace("\\","/").replace("'",""), f) for f in os.listdir(dirs.replace("\\","/").replace("'","")) if f.endswith(".pdf")]
 
 client = QdrantClient(host="host.docker.internal", port="6333")
 encoder = SentenceTransformer("all-MiniLM-L6-v2")
diff --git a/docker/select_and_run.py b/docker/select_and_run.py
@@ -1,16 +1,19 @@
 import subprocess as sp
 import gradio as gr
 
-TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py", "protein-folding": "protein_folding_with_esm.py", "video-generation": "video_generation.py", "speech-recognition": "speech_recognition.py", "spaces-api-supabase": "spaces_api_supabase.py", "audio-classification": "audio_classification.py", "autotrain": "autotrain_interface.py"}
+TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py", "protein-folding": "protein_folding_with_esm.py", "video-generation": "video_generation.py", "speech-recognition": "speech_recognition.py", "spaces-api-supabase": "spaces_api_supabase.py", "audio-classification": "audio_classification.py", "autotrain": "autotrain_interface.py", "llama.cpp-and-qdrant": "llama_cpp_int.py"}
 
 
 def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim="512", gradioclient="None", supabaseurl="None", collectname="None", supenc="all-MiniLM-L6-v2", supdim="384"):
-    if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations" and tsk != "retrieval-image-search" and tsk != "autotrain" and tsk != "protein-folding" and tsk != "spaces-api-supabase":
+    if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations" and tsk != "retrieval-image-search" and tsk != "autotrain" and tsk != "protein-folding" and tsk != "spaces-api-supabase" and tsk != "llama.cpp-and-qdrant":
         sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}", shell=True)
         return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}"
     elif tsk == "retrieval-text-generation":
         sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True)
         return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'"
+    elif tsk == "llama.cpp-and-qdrant":
+        sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True)
+        return f"python3 {TASK_TO_SCRIPT[tsk]} -pf '{pdff}' -d '{dirs}' -l '{lan}'"
     elif tsk == "image-generation-pollinations" or tsk == "autotrain" or tsk == "protein-folding":
         sp.run(f"python3 {TASK_TO_SCRIPT[tsk]}", shell=True)
         return f"python3 {TASK_TO_SCRIPT[tsk]}"
@@ -41,15 +44,15 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim="
         ),
         gr.Textbox(
             label="PDF file(s)",
-            info="Single pdf file or N pdfs reported like this: /path/to/file1.pdf,/path/to/file2.pdf,...,/path/to/fileN.pdf (there is no strict naming, you just need to provide them comma-separated): only available with 'retrieval-text-generation'",
+            info="Single pdf file or N pdfs reported like this: /path/to/file1.pdf,/path/to/file2.pdf,...,/path/to/fileN.pdf (there is no strict naming, you just need to provide them comma-separated), please do not use '\\' as path separators: only available with 'retrieval-text-generation'",
             lines=3,
-            value="None",
+            value="No file",
         ),
         gr.Textbox(
             label="Directory",
-            info="Directory where all your pdfs or images (.jpg, .jpeg, .png) of interest are stored (only available with 'retrieval-text-generation' for pdfs and 'retrieval-image-search' for images)",
+            info="Directory where all your pdfs or images (.jpg, .jpeg, .png) of interest are stored (only available with 'retrieval-text-generation' for pdfs and 'retrieval-image-search' for images). Please do not use '\\' as path separators",
             lines=3,
-            value="None",
+            value="No directory",
         ),
         gr.Textbox(
             label="Language",