-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
102 lines (81 loc) · 4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import pathlib
import torch
from PIL import Image
from label_studio_ml.model import LabelStudioMLBase
from transformers import OwlViTProcessor, OwlViTForObjectDetection
CLIP_CLASSES = ["a photo of a cow", "a photo of a chicken"]
CLIP_CLASSES_MAP = {"a photo of a cow": "Cow", "a photo of a chicken": "Chicken"}
SCORE_THRESHOLD = 0.3
LABEL_STUDIO_DATA_PATH = "/home/pavtiger/Docs/label-studio/mydata/media"
class DummyModel(LabelStudioMLBase):
def __init__(self, **kwargs):
super(DummyModel, self).__init__(**kwargs)
from_name, schema = list(self.parsed_label_config.items())[0]
self.from_name = from_name
self.to_name = schema['to_name'][0]
self.labels = schema['labels']
def predict_clip(self, image):
results = []
x_shape, y_shape = image.size
processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
texts = [CLIP_CLASSES]
inputs = processor(text=texts, images=image, return_tensors="pt")
outputs = model(**inputs)
# Target image sizes (height, width) to rescale box predictions [batch_size, 2]
target_sizes = torch.Tensor([image.size[::-1]])
# Convert outputs (bounding boxes and class logits) to COCO API
model_results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes)
i = 0 # Retrieve predictions for the first image for the corresponding text queries
text = texts[i]
boxes, scores, labels = model_results[i]["boxes"].detach().cpu().numpy(), model_results[i]["scores"].detach().cpu().numpy(), model_results[i]["labels"].detach().cpu().numpy()
for box, score, label in zip(boxes, scores, labels):
box = [round(i, 2) for i in box.tolist()]
if score >= SCORE_THRESHOLD:
print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
x_delta, y_delta = box[2] - box[0], box[3] - box[1]
result = {
'from_name': self.from_name,
'to_name': self.to_name,
"original_width": x_shape,
"original_height": y_shape,
"image_rotation": 0,
'type': 'rectanglelabels',
'value': {
'rectanglelabels': [CLIP_CLASSES_MAP[text[label]]],
'x': (box[0] / x_shape) * 100,
'y': (box[1] / y_shape) * 100,
'width': (x_delta / x_shape) * 100,
'height': (y_delta / y_shape) * 100
},
'score': round(score.item(), 3),
'model_version': 'test123'
}
results.append(result)
return results
def predict(self, tasks, **kwargs):
""" This is where inference happens:
model returns the list of predictions based on input list of tasks
:param tasks: Label Studio tasks in JSON format
"""
results = []
print(len(tasks))
for i, task in enumerate(tasks):
print(task['data']['image'])
image_path = pathlib.Path(task['data']['image'])
new_path = pathlib.Path(*image_path.parts[2:])
img = Image.open(os.path.join(LABEL_STUDIO_DATA_PATH, new_path))
y = self.predict_clip(img)
results.append({
'result': y,
'score': float(0.4)
})
return results
def fit(self, completions, workdir=None, **kwargs):
""" This is where training happens: train your model given list of completions,
then returns dict with created links and resources
:param completions: aka annotations, the labeling results from Label Studio
:param workdir: current working directory for ML backend
"""
return NotImplementedError