Building out OCR functions, extending generic functions

glitchassassin · Nov 12, 2018 · 07b1dcd · 07b1dcd
1 parent ae4c520
commit 07b1dcd
Show file tree

Hide file tree

Showing 5 changed files with 300 additions and 64 deletions.
diff --git a/lackey/Ocr.py b/lackey/Ocr.py
@@ -3,24 +3,39 @@
 import csv
 import re
 
-#from .SettingsDebug import Debug
+from .SettingsDebug import Debug
 
-class TextOCR():
+class OCR():
+    def start(self):
+        """
+        Returns the object itself (a hack for Sikuli compatibility)
+        """
+        return self # Dummy method for Sikuli compatibility
     def image_to_text(self, image):
+        """
+        Returns the text found in the given image.
+        """
         return pytesseract.image_to_string(image)
 
-    def find_all_in_image(self, image, text, confidence=0.7):
+    def find_all_in_image(self, image, text, confidence=0.6):
+        """
+        Finds all blocks of text in `image` that match `text`.
+        Currently ignores confidence
+        """
         confidence = confidence*100 # Scaling for pytesseract
         data = pytesseract.image_to_data(image)
-        reader = csv.DictReader(data.split("\n"), delimiter="\t")
+        print(data)
+        reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
         rects = [r for r in reader]
+        # Debug.info("Rects: " + repr(rects))
         line = []
         matches = []
         for rect in rects:
             if len(line) and (line[0]["page_num"], line[0]["block_num"], line[0]["par_num"], line[0]["line_num"]) == (rect["page_num"], rect["block_num"], rect["par_num"], rect["line_num"]):
                 # This rect is on the same line
                 line.append(rect)
             else:
+                Debug.info("Line: " + " ".join(e["text"] for e in line if e["text"] is not None)) # int(e["conf"]) > confidence and 
                 line = [rect]
 
             if self._check_if_line_matches(line, text, confidence):
@@ -31,14 +46,19 @@ def find_all_in_image(self, image, text, confidence=0.7):
             # If so, and there are multiple elements, try removing the first one and see if it still matches.
             # If not, add next element, and check if it matches
         return matches
-    def find_in_image(self, image, text, confidence=0.7):
+    def find_in_image(self, image, text, confidence=0.6):
+        """
+        Finds first match of `text` in `image` (may be a regex).
+        Currently ignores confidence
+        """
         matches = self.find_all_in_image(image, text, confidence)
+        Debug.info("Matches: " + repr(matches))
         if matches:
             return matches[0]
         return None
     def _check_if_line_matches(self, line, text, confidence):
         # Join the `text` property from each element in `line` and compare it with the `text` regex
-        return re.search(text, " ".join(e["text"] for e in line if int(e["conf"]) > confidence and e["text"] is not None)) is not None
+        return re.search(text, " ".join(e["text"] for e in line if e["text"] is not None)) is not None # int(e["conf"]) > confidence and
     def _reduce_line_matches(self, line, text, confidence):
         # Remove the first element from line and see if it still matches
         while self._check_if_line_matches(line, text, confidence):
@@ -57,8 +77,10 @@ def _reduce_line_matches(self, line, text, confidence):
             max(corner[0] for corner in corners) - min(corner[0] for corner in corners), # W
             max(corner[1] for corner in corners) - min(corner[1] for corner in corners), # H
         )
-        return bbox
+        return (bbox, confidence)
+
+TextOCR = OCR()
 
 if __name__ == "__main__":
-    ocr = TextOCR()
+    ocr = TextOCR.start()
     pprint(ocr.find_all_in_image("/Users/jwinsley/Downloads/tesseract/homemeds.png", "Ibuprofen"))