Merge pull request #159 from glitchassassin/develop

v0.7.4
glitchassassin · Aug 6, 2020 · 1ea404a · 1ea404a
2 parents 75e545d + 39f6aa0
commit 1ea404a
Show file tree

Hide file tree

Showing 19 changed files with 496 additions and 395 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@
 
 # Build files
 /build
+/dist
 /Lackey.egg-info
 /docs/build
 

diff --git a/README.md b/README.md
@@ -29,6 +29,10 @@ Then you can just import Lackey at the head of your Sikuli-script python file:
 
 **WARNING** Be aware that this will create global methods that will *overwrite certain Python functions*, such as `type()`. For more information, see the **Sikuli Patching** section below.
 
+#### Installing Tesseract OCR ####
+
+OCR features are dependent upon a third-party utility, Tesseract OCR (v3.05+). Installation instructions for your platform [can be found here](https://github.com/tesseract-ocr/tesseract/wiki). On some platforms, you may need to manually add the Tesseract folder to your PATH.
+
 ### General ###
 
 The Lackey library is divided up into classes for finding and interacting with particular regions of the screen. Patterns are provided as bitmap files (supported formats include `.bmp`, `.pbm`, `.ras`, `.jpg`, `.tiff`, and `.png`). These patterns are compared to a Region of the screen, and, if they exist, can target a mouse move/click action.
@@ -83,6 +87,8 @@ Don't forget to update the unit tests and verify that they still run.
 
 This library is currently under development, and may have some bugs. Check the Issues list to find features/bugs you can help with!
 
+As of version 0.7.4, Python 2 has been deprecated, so support is not an active priority. 
+
 ## Build Instructions ##
 
 To build the wheel from source, `cd` to the project directory and run:

diff --git a/appveyor.yml b/appveyor.yml
@@ -14,17 +14,15 @@ environment:
     secure: 1+JDFvadY94ojZGhbEeZ/G0of7zzFWwXaj4Mx0Th0Lo=
   matrix:
 
-    # Python 2.7.12 is the latest version and is not pre-installed.
-
-    - PYTHON: "C:\\Python27"
-    - PYTHON: "C:\\Python27-x64"
-
     - PYTHON: "C:\\Python36"
     - PYTHON: "C:\\Python36-x64"
 
     - PYTHON: "C:\\Python37"
     - PYTHON: "C:\\Python37-x64"
 
+    - PYTHON: "C:\\Python38"
+    - PYTHON: "C:\\Python38-x64"
+
 install:
   # If there is a newer build queued for the same PR, cancel this one.
   # The AppVeyor 'rollout builds' option is supposed to serve the same

diff --git a/lackey/App.py b/lackey/App.py
@@ -190,6 +190,8 @@ def getWindow(self):
         Returns an empty string if no match could be found.
         """
         if self.getPID() != -1:
+            if not self.hasWindow():
+                return ""
             return PlatformManager.getWindowTitle(PlatformManager.getWindowByPID(self.getPID()))
         else:
             return ""
@@ -213,7 +215,7 @@ def waitForWindow(self, seconds=5):
         timeout = time.time() + seconds
         while True:
             window_region = self.window()
-            if window_region is not None or time.time() < timeout:
+            if window_region is not None or time.time() > timeout:
                 break
             time.sleep(0.5)
         return window_region
@@ -224,6 +226,8 @@ def window(self, windowNum=0):
         """
         if self._pid == -1:
             return None
+        if not self.hasWindow():
+            return None
         x,y,w,h = PlatformManager.getWindowRect(PlatformManager.getWindowByPID(self._pid, windowNum))
         return Region(x,y,w,h).clipRegionToScreen()
 

diff --git a/lackey/Ocr.py b/lackey/Ocr.py
@@ -0,0 +1,124 @@
+import pytesseract
+import csv
+import re
+
+from .SettingsDebug import Debug
+
+class OCR():
+    def start(self):
+        """
+        Returns the object itself (a hack for Sikuli compatibility)
+        """
+        return self # Dummy method for Sikuli compatibility
+    def image_to_text(self, image):
+        """
+        Returns the text found in the given image.
+        """
+        return pytesseract.image_to_string(image)
+    def find_word(self, image, text, confidence=0.6):
+        """
+        Finds the first word in `image` that matches `text`.
+        Currently ignores confidence
+        """
+        data = pytesseract.image_to_data(image)
+        reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
+        for rect in reader:
+            if re.search(text, rect["text"]):
+                return (
+                    (
+                        rect["left"],
+                        rect["top"],
+                        rect["width"],
+                        rect["height"]
+                    ),
+                    rect["conf"]
+                )
+        return None
+    def find_line(self, image, text, confidence=0.6):
+        """
+        Finds all lines in `image` that match `text`.
+        Currently ignores confidence
+        """
+        data = pytesseract.image_to_data(image)
+        reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
+        lines = {}
+        for rect in reader:
+            key = (int(rect["page_num"]), int(rect["block_num"]), int(rect["par_num"]), int(rect["line_num"]))
+            if key not in lines:
+                lines[key] = ""
+            lines[key] += " " + rect["text"]
+        for line in lines:
+            if re.search(text, line):
+                return (
+                    (
+                        rect["left"],
+                        rect["top"],
+                        rect["width"],
+                        rect["height"]
+                    ),
+                    rect["conf"]
+                )
+        return None
+    def find_all_in_image(self, image, text, confidence=0.6):
+        """
+        Finds all blocks of text in `image` that match `text`.
+        Currently ignores confidence
+        """
+        confidence = confidence*100 # Scaling for pytesseract
+        data = pytesseract.image_to_data(image)
+        print(data)
+        reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
+        rects = [r for r in reader]
+        # Debug.info("Rects: " + repr(rects))
+        line = []
+        matches = []
+        for rect in rects:
+            if len(line) and (line[0]["page_num"], line[0]["block_num"], line[0]["par_num"], line[0]["line_num"]) == (rect["page_num"], rect["block_num"], rect["par_num"], rect["line_num"]):
+                # This rect is on the same line
+                line.append(rect)
+            else:
+                # Debug.info("Line: " + " ".join(e["text"] for e in line if e["text"] is not None)) # int(e["conf"]) > confidence and 
+                line = [rect]
+
+            if self._check_if_line_matches(line, text, confidence):
+                matches.append(self._reduce_line_matches(line, text, confidence))
+                line = []
+            # Start with first element in line
+            # Check if it matches
+            # If so, and there are multiple elements, try removing the first one and see if it still matches.
+            # If not, add next element, and check if it matches
+        return matches
+    def find_in_image(self, image, text, confidence=0.6):
+        """
+        Finds first match of `text` in `image` (may be a regex).
+        Currently ignores confidence
+        """
+        matches = self.find_all_in_image(image, text, confidence)
+        # Debug.info("Matches: " + repr(matches))
+        if matches:
+            return matches[0]
+        return None
+    def _check_if_line_matches(self, line, text, confidence):
+        # Join the `text` property from each element in `line` and compare it with the `text` regex
+        return re.search(text, " ".join(e["text"] for e in line if e["text"] is not None)) is not None # int(e["conf"]) > confidence and
+    def _reduce_line_matches(self, line, text, confidence):
+        # Remove the first element from line and see if it still matches
+        while self._check_if_line_matches(line, text, confidence):
+            # If so, continue
+            last_element = line.pop(0)
+        # If not, replace the element, and calculate the bounding box of the remaining elements
+        line = [last_element] + line
+        #print("Matched line: " + repr(line)) # DEBUG
+        corners = []
+        for e in line:
+            corners.append((int(e["left"]), int(e["top"])))
+            corners.append((int(e["left"])+int(e["width"]), int(e["top"])+int(e["height"])))
+        bbox = (
+            min(corner[0] for corner in corners), # X
+            min(corner[1] for corner in corners), # Y
+            max(corner[0] for corner in corners) - min(corner[0] for corner in corners), # W
+            max(corner[1] for corner in corners) - min(corner[1] for corner in corners), # H
+        )
+        return (bbox, confidence)
+
+TextOCR = OCR()
diff --git a/lackey/PlatformManagerDarwin.py b/lackey/PlatformManagerDarwin.py
@@ -273,6 +273,17 @@ def getScreenDetails(self):
                 )
             }
             screens.append(screen)
+
+        # Convert y-coordinates
+        y1 = screens[0]["rect"][1] # min([s["rect"][1] for s in screens])
+        y2 = screens[0]["rect"][1]+screens[0]["rect"][3] # max([s["rect"][1]+s["rect"][3] for s in screens])
+        for screen in screens:
+            screen["rect"] = (
+                screen["rect"][0],
+                (y2 - screen["rect"][3]) - screen["rect"][1],
+                screen["rect"][2],
+                screen["rect"][3]
+            )
         return screens
     def isPointVisible(self, x, y):
         """ Checks if a point is visible on any monitor. """
@@ -287,15 +298,15 @@ def isPointVisible(self, x, y):
     def osCopy(self):
         """ Triggers the OS "copy" keyboard shortcut """
         k = Keyboard()
-        k.keyDown("{CTRL}")
+        k.keyDown("{CMD}")
         k.type("c")
-        k.keyUp("{CTRL}")
+        k.keyUp("{CMD}")
     def osPaste(self):
         """ Triggers the OS "paste" keyboard shortcut """
         k = Keyboard()
-        k.keyDown("{CTRL}")
+        k.keyDown("{CMD}")
         k.type("v")
-        k.keyUp("{CTRL}")
+        k.keyUp("{CMD}")
 
     ## Window functions
 
@@ -313,12 +324,13 @@ def getWindowByPID(self, pid, order=0):
         """ Returns a handle for the first window that matches the provided PID """
         for w in self._get_window_list():
             if "kCGWindowOwnerPID" in w and w["kCGWindowOwnerPID"] == pid:
+                print(self.getWindowRect(w["kCGWindowNumber"]))
                 # Matches - make sure we get it in the correct order
                 if order == 0:
                     return w["kCGWindowNumber"]
                 else:
                     order -= 1
-        raise OSError("Could not find window for PID {} at index {}".format(pid, order))
+        return None
     def getWindowRect(self, hwnd):
         """ Returns a rect (x,y,w,h) for the specified window's area """
         for w in self._get_window_list():
@@ -328,7 +340,7 @@ def getWindowRect(self, hwnd):
                 width = w["kCGWindowBounds"]["Width"]
                 height = w["kCGWindowBounds"]["Height"]
                 return (x, y, width, height)
-        raise OSError("Unrecognized window number {}".format(hwnd))
+        return None
     def focusWindow(self, hwnd):
         """ Brings specified window to the front """
         Debug.log(3, "Focusing window: " + str(hwnd))
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ @@
     # Build files
     /build
+    /dist
     /Lackey.egg-info
     /docs/build
@@ Expand Down @@