Skip to content

Commit ea01ef0

Browse files
committed
new features
1 parent 5b69aa5 commit ea01ef0

12 files changed

+192
-14
lines changed

.gitignore

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
flagged/
2-
scripts/__pycache__
32
docker/__pycache__
4-
docker/flagged
3+
docker/flagged
4+
qdrant_storage/

README.md

+12
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ Choose the task among:
5757
- *image-generation-pollinations*: stable diffusion, use Pollinations AI API; if you choose 'image-generation-pollinations', you do not need to specify anything else apart from the task - **MULTILINGUAL**
5858
- *image-classification*: classify an image, supports every image-classification model on HF Hub - **ENGLISH ONLY**
5959
- *image-to-text*: describe an image, supports every image-to-text model on HF Hub - **ENGLISH ONLY**
60+
- *image-retrieval-search*: search an image database uploading a folder as database input. The folder should have the following structure:
61+
62+
```
63+
./
64+
├── test/
65+
| ├── label1/
66+
| └── label2/
67+
└── train/
68+
├── label1/
69+
└── label2/
70+
```
71+
You can query the database starting from your own pictures.
6072

6173
### 6. Go to `localhost:7860` and start using your assistant
6274

docker/Dockerfile

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ WORKDIR /app
77
# Add the current directory contents into the container at /app
88
ADD . /app
99

10+
# Add new package
11+
RUN python3 -m pip install datasets==2.15.0
12+
1013
# Expose the port that the application will run on
11-
EXPOSE 7860
14+
EXPOSE 8760
1215

1316
ENTRYPOINT [ "python3", "select_and_run.py" ]

docker/image_classification.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from transformers import AutoModelForImageClassification, AutoImageProcessor, pipeline
22
from PIL import Image
33
from argparse import ArgumentParser
4+
import torch
45

56
argparse = ArgumentParser()
67
argparse.add_argument(
@@ -18,9 +19,11 @@
1819

1920
model_checkpoint = mod
2021

21-
model = AutoModelForImageClassification.from_pretrained(model_checkpoint)
22+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23+
model = AutoModelForImageClassification.from_pretrained(model_checkpoint).to(device)
2224
processor = AutoImageProcessor.from_pretrained(model_checkpoint)
2325

26+
2427
pipe = pipeline("image-classification", model=model, image_processor=processor)
2528

2629
def get_results(image, ppln=pipe):

docker/image_to_text.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919

2020
model_checkpoint = mod
2121

22-
pipe = pipeline("image-to-text", model=model_checkpoint)
22+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23+
pipe = pipeline("image-to-text", model=model_checkpoint, device=device)
2324

2425
def get_results(image, ppln=pipe):
2526
img = Image.fromarray(image)

docker/requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ diffusers==0.27.2
1111
pydantic==2.6.4
1212
qdrant_client==1.9.0
1313
pillow==10.2.0
14-
accelerate
14+
datasets==2.15.0
15+
accelerate

docker/retrieval_image_search.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from transformers import AutoImageProcessor, AutoModel
2+
from utils import ImageDB
3+
from PIL import Image
4+
from qdrant_client import QdrantClient
5+
import gradio as gr
6+
from argparse import ArgumentParser
7+
import torch
8+
9+
argparse = ArgumentParser()
10+
argparse.add_argument(
11+
"-m",
12+
"--model",
13+
help="HuggingFace Model identifier, such as 'google/flan-t5-base'",
14+
required=True,
15+
)
16+
17+
argparse.add_argument(
18+
"-id",
19+
"--image_dimension",
20+
help="Dimension of the image (e.g. 512, 758, 384...)",
21+
required=False,
22+
default=512,
23+
type=int
24+
)
25+
26+
argparse.add_argument(
27+
"-d",
28+
"--directory",
29+
help="Directory where all your pdfs of interest are stored",
30+
required=False,
31+
default="No directory"
32+
)
33+
34+
35+
args = argparse.parse_args()
36+
37+
38+
mod = args.model
39+
dirs = args.directory
40+
imd = args.image_dimension
41+
42+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43+
processor = AutoImageProcessor.from_pretrained(mod)
44+
model = AutoModel.from_pretrained(mod).to(device)
45+
46+
client = QdrantClient(host="host.docker.internal", port=6333)
47+
imdb = ImageDB(dirs, processor, model, client, imd)
48+
print(imdb.collection_name)
49+
imdb.create_dataset()
50+
imdb.to_collection()
51+
52+
53+
def see_images(dataset, results):
54+
images = []
55+
for i in range(len(results)):
56+
img = dataset[results[0].id]['image']
57+
images.append(img)
58+
return images
59+
60+
def process_img(image):
61+
global imdb
62+
results = imdb.searchDB(Image.fromarray(image))
63+
images = see_images(imdb.dataset, results)
64+
return images
65+
66+
67+
iface = gr.Interface(
68+
title="everything-ai-retrievalimg",
69+
fn=process_img,
70+
inputs=gr.Image(label="Input Image"),
71+
outputs=gr.Gallery(label="Matching Images"),
72+
)
73+
74+
iface.launch(server_name="0.0.0.0", share=False)

docker/retrieval_text_generation.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from sentence_transformers import SentenceTransformer
55
from argparse import ArgumentParser
66
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7-
import sys
7+
import torch
88
import os
99

1010
argparse = ArgumentParser()
@@ -61,7 +61,8 @@
6161
pdfdb.collect_data()
6262
pdfdb.qdrant_collection_and_upload()
6363

64-
model = AutoModelForCausalLM.from_pretrained(mod)
64+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65+
model = AutoModelForCausalLM.from_pretrained(mod).to(device)
6566
tokenizer = AutoTokenizer.from_pretrained(mod)
6667

6768
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=2048, repetition_penalty=1.2, temperature=0.4)

docker/select_and_run.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
import subprocess as sp
22
import gradio as gr
33

4-
TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py"}
4+
TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py"}
55

66

7-
def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None"):
8-
if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations":
7+
def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim="512"):
8+
if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations" and tsk != "retrieval-image-search":
99
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}", shell=True)
1010
return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}"
1111
elif tsk == "retrieval-text-generation":
1212
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True)
1313
return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'"
14-
else:
14+
elif tsk == "image-generation-pollinations":
1515
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]}", shell=True)
1616
return f"python3 {TASK_TO_SCRIPT[tsk]}"
17+
else:
18+
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -d {dirs} -id {imdim} -m {mod}", shell=True)
19+
return f"python3 {TASK_TO_SCRIPT[tsk]} -d {dirs} -id {imdim} -m {mod}"
1720

1821
demo = gr.Interface(
1922
build_command,
@@ -38,7 +41,7 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None"):
3841
),
3942
gr.Textbox(
4043
label="Directory",
41-
info="Directory where all your pdfs of interest are stored (only available with 'retrieval-text-generation')",
44+
info="Directory where all your pdfs or images (.jpg, .jpeg, .png) of interest are stored (only available with 'retrieval-text-generation' for pdfs and 'retrieval-image-search' for images)",
4245
lines=3,
4346
value="None",
4447
),
@@ -48,6 +51,12 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None"):
4851
lines=3,
4952
value="None",
5053
),
54+
gr.Textbox(
55+
label="Image dimension",
56+
info="Dimension of the image (this is generally model and/or task-dependent!)",
57+
lines=3,
58+
value=f"e.g.: 512, 384, 758...",
59+
),
5160
],
5261
outputs="textbox",
5362
theme=gr.themes.Base()

docker/text_summarization.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from utils import merge_pdfs
66
import gradio as gr
77
import time
8+
import torch
89

910
histr = [[None, "Hi, I'm **everything-ai-summarization**🤖.\nI'm here to assist you and let you summarize _your_ texts and _your_ pdfs!\nCheck [my website](https://astrabert.github.io/everything-ai/) for troubleshooting and documentation reference\nHave fun!😊"]]
1011

@@ -24,7 +25,8 @@
2425

2526
model_checkpoint = mod
2627

27-
summarizer = pipeline("summarization", model=model_checkpoint)
28+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29+
summarizer = pipeline("summarization", model=model_checkpoint, device=device)
2830

2931
def convert_none_to_str(l: list):
3032
newlist = []

docker/utils.py

+72
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
from langchain.text_splitter import CharacterTextSplitter
77
from langchain_community.document_loaders import PyPDFLoader
88
import os
9+
from datasets import load_dataset, Dataset
10+
import torch
11+
import numpy as np
12+
913

1014
def remove_items(test_list, item):
1115
res = [i for i in test_list if i != item]
@@ -92,3 +96,71 @@ def translatef(self):
9296
translation = translator.translate(self.text)
9397
return translation
9498

99+
class ImageDB:
100+
def __init__(self, imagesdir, processor, model, client, dimension):
101+
self.imagesdir = imagesdir
102+
self.processor = processor
103+
self.model = model
104+
self.client = client
105+
self.dimension = dimension
106+
if os.path.basename(self.imagesdir) != "":
107+
self.collection_name = os.path.basename(self.imagesdir)+"_ImagesCollection"
108+
else:
109+
if "\\" in self.imagesdir:
110+
self.collection_name = self.imagesdir.split("\\")[-2]+"_ImagesCollection"
111+
else:
112+
self.collection_name = self.imagesdir.split("/")[-2]+"_ImagesCollection"
113+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
114+
self.client.recreate_collection(
115+
collection_name=self.collection_name,
116+
vectors_config=models.VectorParams(size=self.dimension, distance=models.Distance.COSINE)
117+
)
118+
def get_embeddings(self, batch):
119+
inputs = self.processor(images=batch['image'], return_tensors="pt").to(self.device)
120+
with torch.no_grad():
121+
outputs = self.model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
122+
batch['embeddings'] = outputs
123+
return batch
124+
def create_dataset(self):
125+
self.dataset = load_dataset("imagefolder", data_dir=self.imagesdir, split="train")
126+
self.dataset = self.dataset.map(self.get_embeddings, batched=True, batch_size=16)
127+
def to_collection(self):
128+
np.save(os.path.join(self.imagesdir, "vectors"), np.array(self.dataset['embeddings']), allow_pickle=False)
129+
130+
payload = self.dataset.select_columns([
131+
"label"
132+
]).to_pandas().fillna(0).to_dict(orient="records")
133+
134+
ids = list(range(self.dataset.num_rows))
135+
embeddings = np.load(os.path.join(self.imagesdir, "vectors.npy")).tolist()
136+
137+
batch_size = 1000
138+
139+
for i in range(0, self.dataset.num_rows, batch_size):
140+
141+
low_idx = min(i+batch_size, self.dataset.num_rows)
142+
143+
batch_of_ids = ids[i: low_idx]
144+
batch_of_embs = embeddings[i: low_idx]
145+
batch_of_payloads = payload[i: low_idx]
146+
147+
self.client.upsert(
148+
collection_name = self.collection_name,
149+
points=models.Batch(
150+
ids=batch_of_ids,
151+
vectors=batch_of_embs,
152+
payloads=batch_of_payloads
153+
)
154+
)
155+
def searchDB(self, image):
156+
dtst = {"image": [image], "label": ["None"]}
157+
dtst = Dataset.from_dict(dtst)
158+
dtst = dtst.map(self.get_embeddings, batched=True, batch_size=1)
159+
img = dtst[0]
160+
results = self.client.search(
161+
collection_name=self.collection_name,
162+
query_vector=img['embeddings'],
163+
limit=4
164+
)
165+
return results
166+

imgs/everything-ai.drawio.png

41.4 KB
Loading

0 commit comments

Comments
 (0)