Skip to content

Commit

Permalink
Merge pull request #159 from glitchassassin/develop
Browse files Browse the repository at this point in the history
v0.7.4
  • Loading branch information
glitchassassin authored Aug 6, 2020
2 parents 75e545d + 39f6aa0 commit 1ea404a
Show file tree
Hide file tree
Showing 19 changed files with 496 additions and 395 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

# Build files
/build
/dist
/Lackey.egg-info
/docs/build

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ Then you can just import Lackey at the head of your Sikuli-script python file:

**WARNING** Be aware that this will create global methods that will *overwrite certain Python functions*, such as `type()`. For more information, see the **Sikuli Patching** section below.

#### Installing Tesseract OCR ####

OCR features are dependent upon a third-party utility, Tesseract OCR (v3.05+). Installation instructions for your platform [can be found here](https://github.com/tesseract-ocr/tesseract/wiki). On some platforms, you may need to manually add the Tesseract folder to your PATH.

### General ###

The Lackey library is divided up into classes for finding and interacting with particular regions of the screen. Patterns are provided as bitmap files (supported formats include `.bmp`, `.pbm`, `.ras`, `.jpg`, `.tiff`, and `.png`). These patterns are compared to a Region of the screen, and, if they exist, can target a mouse move/click action.
Expand Down Expand Up @@ -83,6 +87,8 @@ Don't forget to update the unit tests and verify that they still run.

This library is currently under development, and may have some bugs. Check the Issues list to find features/bugs you can help with!

As of version 0.7.4, Python 2 has been deprecated, so support is not an active priority.

## Build Instructions ##

To build the wheel from source, `cd` to the project directory and run:
Expand Down
8 changes: 3 additions & 5 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@ environment:
secure: 1+JDFvadY94ojZGhbEeZ/G0of7zzFWwXaj4Mx0Th0Lo=
matrix:

# Python 2.7.12 is the latest version and is not pre-installed.

- PYTHON: "C:\\Python27"
- PYTHON: "C:\\Python27-x64"

- PYTHON: "C:\\Python36"
- PYTHON: "C:\\Python36-x64"

- PYTHON: "C:\\Python37"
- PYTHON: "C:\\Python37-x64"

- PYTHON: "C:\\Python38"
- PYTHON: "C:\\Python38-x64"

install:
# If there is a newer build queued for the same PR, cancel this one.
# The AppVeyor 'rollout builds' option is supposed to serve the same
Expand Down
6 changes: 5 additions & 1 deletion lackey/App.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ def getWindow(self):
Returns an empty string if no match could be found.
"""
if self.getPID() != -1:
if not self.hasWindow():
return ""
return PlatformManager.getWindowTitle(PlatformManager.getWindowByPID(self.getPID()))
else:
return ""
Expand All @@ -213,7 +215,7 @@ def waitForWindow(self, seconds=5):
timeout = time.time() + seconds
while True:
window_region = self.window()
if window_region is not None or time.time() < timeout:
if window_region is not None or time.time() > timeout:
break
time.sleep(0.5)
return window_region
Expand All @@ -224,6 +226,8 @@ def window(self, windowNum=0):
"""
if self._pid == -1:
return None
if not self.hasWindow():
return None
x,y,w,h = PlatformManager.getWindowRect(PlatformManager.getWindowByPID(self._pid, windowNum))
return Region(x,y,w,h).clipRegionToScreen()

Expand Down
124 changes: 124 additions & 0 deletions lackey/Ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import pytesseract
import csv
import re

from .SettingsDebug import Debug

class OCR():
def start(self):
"""
Returns the object itself (a hack for Sikuli compatibility)
"""
return self # Dummy method for Sikuli compatibility
def image_to_text(self, image):
"""
Returns the text found in the given image.
"""
return pytesseract.image_to_string(image)
def find_word(self, image, text, confidence=0.6):
"""
Finds the first word in `image` that matches `text`.
Currently ignores confidence
"""
data = pytesseract.image_to_data(image)
reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
for rect in reader:
if re.search(text, rect["text"]):
return (
(
rect["left"],
rect["top"],
rect["width"],
rect["height"]
),
rect["conf"]
)
return None
def find_line(self, image, text, confidence=0.6):
"""
Finds all lines in `image` that match `text`.
Currently ignores confidence
"""
data = pytesseract.image_to_data(image)
reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
lines = {}
for rect in reader:
key = (int(rect["page_num"]), int(rect["block_num"]), int(rect["par_num"]), int(rect["line_num"]))
if key not in lines:
lines[key] = ""
lines[key] += " " + rect["text"]
for line in lines:
if re.search(text, line):
return (
(
rect["left"],
rect["top"],
rect["width"],
rect["height"]
),
rect["conf"]
)
return None
def find_all_in_image(self, image, text, confidence=0.6):
"""
Finds all blocks of text in `image` that match `text`.
Currently ignores confidence
"""
confidence = confidence*100 # Scaling for pytesseract
data = pytesseract.image_to_data(image)
print(data)
reader = csv.DictReader(data.split("\n"), delimiter="\t", quoting=csv.QUOTE_NONE)
rects = [r for r in reader]
# Debug.info("Rects: " + repr(rects))
line = []
matches = []
for rect in rects:
if len(line) and (line[0]["page_num"], line[0]["block_num"], line[0]["par_num"], line[0]["line_num"]) == (rect["page_num"], rect["block_num"], rect["par_num"], rect["line_num"]):
# This rect is on the same line
line.append(rect)
else:
# Debug.info("Line: " + " ".join(e["text"] for e in line if e["text"] is not None)) # int(e["conf"]) > confidence and
line = [rect]

if self._check_if_line_matches(line, text, confidence):
matches.append(self._reduce_line_matches(line, text, confidence))
line = []
# Start with first element in line
# Check if it matches
# If so, and there are multiple elements, try removing the first one and see if it still matches.
# If not, add next element, and check if it matches
return matches
def find_in_image(self, image, text, confidence=0.6):
"""
Finds first match of `text` in `image` (may be a regex).
Currently ignores confidence
"""
matches = self.find_all_in_image(image, text, confidence)
# Debug.info("Matches: " + repr(matches))
if matches:
return matches[0]
return None
def _check_if_line_matches(self, line, text, confidence):
# Join the `text` property from each element in `line` and compare it with the `text` regex
return re.search(text, " ".join(e["text"] for e in line if e["text"] is not None)) is not None # int(e["conf"]) > confidence and
def _reduce_line_matches(self, line, text, confidence):
# Remove the first element from line and see if it still matches
while self._check_if_line_matches(line, text, confidence):
# If so, continue
last_element = line.pop(0)
# If not, replace the element, and calculate the bounding box of the remaining elements
line = [last_element] + line
#print("Matched line: " + repr(line)) # DEBUG
corners = []
for e in line:
corners.append((int(e["left"]), int(e["top"])))
corners.append((int(e["left"])+int(e["width"]), int(e["top"])+int(e["height"])))
bbox = (
min(corner[0] for corner in corners), # X
min(corner[1] for corner in corners), # Y
max(corner[0] for corner in corners) - min(corner[0] for corner in corners), # W
max(corner[1] for corner in corners) - min(corner[1] for corner in corners), # H
)
return (bbox, confidence)

TextOCR = OCR()
24 changes: 18 additions & 6 deletions lackey/PlatformManagerDarwin.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,17 @@ def getScreenDetails(self):
)
}
screens.append(screen)

# Convert y-coordinates
y1 = screens[0]["rect"][1] # min([s["rect"][1] for s in screens])
y2 = screens[0]["rect"][1]+screens[0]["rect"][3] # max([s["rect"][1]+s["rect"][3] for s in screens])
for screen in screens:
screen["rect"] = (
screen["rect"][0],
(y2 - screen["rect"][3]) - screen["rect"][1],
screen["rect"][2],
screen["rect"][3]
)
return screens
def isPointVisible(self, x, y):
""" Checks if a point is visible on any monitor. """
Expand All @@ -287,15 +298,15 @@ def isPointVisible(self, x, y):
def osCopy(self):
""" Triggers the OS "copy" keyboard shortcut """
k = Keyboard()
k.keyDown("{CTRL}")
k.keyDown("{CMD}")
k.type("c")
k.keyUp("{CTRL}")
k.keyUp("{CMD}")
def osPaste(self):
""" Triggers the OS "paste" keyboard shortcut """
k = Keyboard()
k.keyDown("{CTRL}")
k.keyDown("{CMD}")
k.type("v")
k.keyUp("{CTRL}")
k.keyUp("{CMD}")

## Window functions

Expand All @@ -313,12 +324,13 @@ def getWindowByPID(self, pid, order=0):
""" Returns a handle for the first window that matches the provided PID """
for w in self._get_window_list():
if "kCGWindowOwnerPID" in w and w["kCGWindowOwnerPID"] == pid:
print(self.getWindowRect(w["kCGWindowNumber"]))
# Matches - make sure we get it in the correct order
if order == 0:
return w["kCGWindowNumber"]
else:
order -= 1
raise OSError("Could not find window for PID {} at index {}".format(pid, order))
return None
def getWindowRect(self, hwnd):
""" Returns a rect (x,y,w,h) for the specified window's area """
for w in self._get_window_list():
Expand All @@ -328,7 +340,7 @@ def getWindowRect(self, hwnd):
width = w["kCGWindowBounds"]["Width"]
height = w["kCGWindowBounds"]["Height"]
return (x, y, width, height)
raise OSError("Unrecognized window number {}".format(hwnd))
return None
def focusWindow(self, hwnd):
""" Brings specified window to the front """
Debug.log(3, "Focusing window: " + str(hwnd))
Expand Down
Loading

0 comments on commit 1ea404a

Please sign in to comment.