Skip to content

Commit 5b12401

Browse files
committed
predict: fix lpr deskew bugs
1 parent d2f1c69 commit 5b12401

File tree

10 files changed

+73
-27
lines changed

10 files changed

+73
-27
lines changed

plugins/coreml/package-lock.json

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/coreml/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,5 @@
4242
"devDependencies": {
4343
"@scrypted/sdk": "file:../../sdk"
4444
},
45-
"version": "0.1.58"
45+
"version": "0.1.59"
4646
}

plugins/onnx/package-lock.json

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/onnx/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,5 @@
4242
"devDependencies": {
4343
"@scrypted/sdk": "file:../../sdk"
4444
},
45-
"version": "0.1.96"
45+
"version": "0.1.97"
4646
}

plugins/openvino/package-lock.json

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/openvino/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,5 @@
4242
"devDependencies": {
4343
"@scrypted/sdk": "file:../../sdk"
4444
},
45-
"version": "0.1.97"
45+
"version": "0.1.98"
4646
}

plugins/openvino/src/common/text.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -49,27 +49,37 @@ def calculate_y_change(original_height, skew_angle_radians):
4949

5050
return y_change
5151

52-
async def prepare_text_result(d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float):
52+
async def prepare_text_result(d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float, deskew_height: float):
5353
textImage = await crop_text(d, image)
5454

5555
skew_height_change = calculate_y_change(d["boundingBox"][3], skew_angle)
5656
skew_height_change = math.floor(skew_height_change)
5757
textImage = skew_image(textImage, skew_angle)
5858
# crop skew_height_change from top
5959
if skew_height_change > 0:
60-
textImage = textImage.crop((0, 0, textImage.width, textImage.height - skew_height_change))
60+
textImage = textImage.crop((0, 0, textImage.width, deskew_height))
6161
elif skew_height_change < 0:
62-
textImage = textImage.crop((0, -skew_height_change, textImage.width, textImage.height))
62+
textImage = textImage.crop((0, textImage.height - deskew_height, textImage.width, textImage.height))
6363

64-
new_height = 64
64+
target_height = 64
65+
height_padding = 3
66+
new_height = target_height - height_padding * 2
6567
new_width = int(textImage.width * new_height / textImage.height)
6668
textImage = textImage.resize((new_width, new_height), resample=Image.LANCZOS).convert("L")
6769

6870
new_width = 256
71+
# average the top pixels
72+
edge_color = textImage.getpixel((0, textImage.height // 2))
73+
# average the bottom pixels
74+
edge_color += textImage.getpixel((textImage.width - 1, textImage.height // 2))
75+
# average the right pixels
76+
edge_color += textImage.getpixel((textImage.width // 2, 0))
77+
# average the left pixels
78+
edge_color += textImage.getpixel((textImage.width // 2, textImage.height - 1))
79+
edge_color = edge_color // 4
80+
6981
# calculate padding dimensions
70-
padding = (0, 0, new_width - textImage.width, 0)
71-
# todo: clamp entire edge rather than just center
72-
edge_color = textImage.getpixel((textImage.width - 1, textImage.height // 2))
82+
padding = (0, height_padding, new_width - textImage.width, height_padding)
7383
# pad image
7484
textImage = ImageOps.expand(textImage, padding, fill=edge_color)
7585
# pil to numpy

plugins/openvino/src/predict/face_recognize.py

+24
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515
from common import yolo
1616
from predict import PredictPlugin
1717

18+
def cosine_similarity(vector_a, vector_b):
19+
dot_product = np.dot(vector_a, vector_b)
20+
norm_a = np.linalg.norm(vector_a)
21+
norm_b = np.linalg.norm(vector_b)
22+
similarity = dot_product / (norm_a * norm_b)
23+
return similarity
1824

1925
class FaceRecognizeDetection(PredictPlugin):
2026
def __init__(self, nativeId: str | None = None):
@@ -153,4 +159,22 @@ async def run_detection_image(
153159
if len(futures):
154160
await asyncio.wait(futures)
155161

162+
# last = None
163+
# for d in ret['detections']:
164+
# if d["className"] != "face":
165+
# continue
166+
# check = d.get("embedding")
167+
# if check is None:
168+
# continue
169+
# # decode base64 string check
170+
# embedding = base64.b64decode(check)
171+
# embedding = np.frombuffer(embedding, dtype=np.float32)
172+
# if last is None:
173+
# last = embedding
174+
# continue
175+
# # convert to numpy float32 arrays
176+
# similarity = cosine_similarity(last, embedding)
177+
# print('similarity', similarity)
178+
# last = embedding
179+
156180
return ret

plugins/openvino/src/predict/text_recognize.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ async def detect_once(
6262
ratio_h = ratio_w = 1
6363
text_threshold = 0.7
6464
link_threshold = 0.9
65-
low_text = 0.4
65+
low_text = 0.5
6666
poly = False
6767

6868
boxes_list, polys_list, scores_list = [], [], []
@@ -138,7 +138,7 @@ async def run_detection_image(
138138
"className": "text",
139139
}
140140
futures.append(
141-
asyncio.ensure_future(self.setLabel(d, image, group["skew_angle"]))
141+
asyncio.ensure_future(self.setLabel(d, image, group["skew_angle"], group['deskew_height']))
142142
)
143143
detections.append(d)
144144

@@ -153,10 +153,10 @@ async def run_detection_image(
153153
return ret
154154

155155
async def setLabel(
156-
self, d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float
156+
self, d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float, deskew_height: float
157157
):
158158
try:
159-
image_tensor = await prepare_text_result(d, image, skew_angle)
159+
image_tensor = await prepare_text_result(d, image, skew_angle, deskew_height)
160160
preds = await self.predictTextModel(image_tensor)
161161
d["label"] = process_text_result(preds)
162162

plugins/openvino/src/predict/text_skew.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -61,28 +61,40 @@ def find_adjacent_groups(boxes: List[BoundingBox], scores: List[float]) -> List[
6161
if added_to_group:
6262
break
6363
if not added_to_group:
64-
groups.append({"boxes": [box], "scores": [scores[index]], "skew_angle": 0})
64+
groups.append({"boxes": [box], "scores": [scores[index]]})
6565

6666
# Calculate the skew angle of each group
6767
for group in groups:
6868
boxes = group["boxes"]
6969
group["union"] = union_boxes(boxes)
7070
if len(boxes) - 1:
71-
lm = (boxes[0][1] + boxes[0][3]) / 2
72-
rm = (boxes[-1][1] + boxes[-1][3]) / 2
73-
dx = (boxes[-1][0]) - (boxes[0][0] + boxes[0][2])
71+
lm = boxes[0][1] + boxes[0][3] / 2
72+
rm = boxes[-1][1] + boxes[-1][3] / 2
73+
dx = (boxes[-1][0]) - (boxes[0][0])
7474
minx = min([box[0] for box in boxes])
7575
maxx = max([box[0] + box[2] for box in boxes])
76+
77+
# denoise by filtering the box height
78+
minh = min([box[3] for box in boxes])
79+
median_height = sorted([box[3] for box in boxes])[len(boxes) // 2]
7680
maxh = max([box[3] for box in boxes])
77-
pad_height = maxh * 0.05
81+
filter_height = median_height
82+
pad_height = filter_height * 0.05
83+
7884
dx = maxx - minx
79-
group['skew_angle'] = math.atan2(rm - lm, dx) * 2
85+
group['skew_angle'] = math.atan((rm - lm) / dx)
86+
group['deskew_height'] = filter_height + pad_height * 2
8087
# pad this box by a few pixels
81-
group['union'] = (group['union'][0] - pad_height, group['union'][1] - pad_height, group['union'][2] + pad_height * 2, group['union'][3] + pad_height * 2)
88+
group['union'] = (
89+
group['union'][0] - pad_height,
90+
group['union'][1] - pad_height,
91+
group['union'][2] + pad_height * 2,
92+
group['union'][3] + pad_height * 2)
8293
# average the scores
8394
group['score'] = sum(group['scores']) / len(group['scores'])
8495
else:
8596
group['skew_angle'] = 0
97+
group['deskew_height'] = boxes[0][3]
8698
group['score'] = group['scores'][0]
8799

88100
return groups

0 commit comments

Comments
 (0)