Skip to content

Commit

Permalink
Merge pull request #96 from Marszzz1116/master
Browse files Browse the repository at this point in the history
1、修复assert_ocr_not_exist断言Bug2、扩展assert_ocr_exist 与 assert_ocr_not_exist断言3、优化OCR识别范围筛选方法
  • Loading branch information
mikigo authored Aug 2, 2024
2 parents 9b96e97 + 6835de3 commit f35bb30
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 54 deletions.
136 changes: 121 additions & 15 deletions src/assert_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from src.ocr_utils import OCRUtils as OCR
from src.image_utils import ImageUtils
from src.mouse_key import MouseKey
from src.filectl import FileCtl
from src.dogtail_utils import DogtailUtils
from src.custom_exception import TemplateElementNotFound
Expand Down Expand Up @@ -398,12 +399,11 @@ def assert_ocr_exist(
timeout: [int, float] = None,
max_match_number: int = None,
mode: str = "all",
bbox: dict = None,
):
"""
断言文案存在
:param args:
目标字符,识别一个字符串或多个字符串,并返回其在图片中的坐标;
如果不传参,返回图片中识别到的所有字符串。
:param args: 目标字符,识别一个字符串或多个字符串。
:param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。
:param similarity: 匹配度。
:param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
Expand All @@ -413,10 +413,54 @@ def assert_ocr_exist(
:param timeout: 最大匹配超时,单位秒
:param max_match_number: 最大匹配次数
:param mode: "all" or "any",all 表示识别所有目标字符,any 表示识别任意一个目标字符,默认值为 all
:param bbox:
接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度
字典字段:
start_x: 开始 x 坐标(左上角)
start_y: 开始 y 坐标(左上角)
w: 宽度
h: 高度
end_x: 结束 x 坐标(右下角)
end_y: 结束 y 坐标(右下角)
注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
示例:
{start_x=0, start_y=0, w=100, h=100}
{start_x=0, start_y=0, end_x=100, end_y=100}
"""

if len(args) == 0:
raise ValueError("缺少 ocr 断言关键字")

pic = None
if picture_abspath is not None:
pic = picture_abspath + ".png"

resolution = MouseKey.screen_size()
if bbox is not None:
start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
w = bbox.get("w") if bbox.get("w") is not None else None
h = bbox.get("h") if bbox.get("h") is not None else None
end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None

if start_x is None or start_y is None:
raise ValueError("缺失 start_x 或 start_y 坐标")

wh_provided = w is not None and h is not None
end_xy_provided = end_x is not None and end_y is not None

if not (wh_provided ^ end_xy_provided):
raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")

if end_xy_provided:
w = end_x - start_x
h = end_y - start_y
picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
pic = picture_abspath + ".png"

resolution = f"{start_x, start_y} -> {w, h}"

res = OCR.ocr(
*args,
picture_abspath=pic,
Expand All @@ -430,7 +474,10 @@ def assert_ocr_exist(
)
if res is False:
raise AssertionError(
(f"通过OCR未识别到:{args}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}")
(
f"通过OCR在范围[{resolution}]未识别到:{args}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
if isinstance(res, tuple):
pass
Expand All @@ -440,14 +487,14 @@ def assert_ocr_exist(
res = filter(lambda x: x[1] is False, res.items())
raise AssertionError(
(
f"通过OCR未识别到{dict(res)}",
f"通过OCR在范围[{resolution}]未识别到{dict(res)}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
elif mode == "any" and len(res) == list(res.values()).count(False):
raise AssertionError(
(
f"通过OCR未识别到{args}中的任意一个",
f"通过OCR在范围[{resolution}]未识别到{args}中的任意一个",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
Expand All @@ -463,11 +510,67 @@ def assert_ocr_not_exist(
pause: [int, float] = None,
timeout: [int, float] = None,
max_match_number: int = None,
bbox: dict = None,
):
"""断言文案不存在"""
"""
断言文案不存在
:param args: 目标字符,识别一个字符串或多个字符串。
:param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。
:param similarity: 匹配度。
:param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
:param lang: `ch`, `en`, `fr`, `german`, `korean`, `japan`
:param network_retry: 连接服务器重试次数
:param pause: 重试间隔时间,单位秒
:param timeout: 最大匹配超时,单位秒
:param max_match_number: 最大匹配次数
:param bbox:
接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度
字典字段:
start_x: 开始 x 坐标(左上角)
start_y: 开始 y 坐标(左上角)
w: 宽度
h: 高度
end_x: 结束 x 坐标(右下角)
end_y: 结束 y 坐标(右下角)
注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
示例:
{start_x=0, start_y=0, w=100, h=100}
{start_x=0, start_y=0, end_x=100, end_y=100}
"""

if len(args) == 0:
raise ValueError("缺少 ocr 断言关键字")

pic = None
if picture_abspath is not None:
pic = picture_abspath + ".png"

resolution = MouseKey.screen_size()
if bbox is not None:
start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
w = bbox.get("w") if bbox.get("w") is not None else None
h = bbox.get("h") if bbox.get("h") is not None else None
end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None

if start_x is None or start_y is None:
raise ValueError("缺失 start_x 或 start_y 坐标")

wh_provided = w is not None and h is not None
end_xy_provided = end_x is not None and end_y is not None

if not (wh_provided ^ end_xy_provided):
raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")

if end_xy_provided:
w = end_x - start_x
h = end_y - start_y
picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
pic = picture_abspath + ".png"

resolution = f"{start_x, start_y} -> {w, h}"

res = OCR.ocr(
*args,
picture_abspath=pic,
Expand All @@ -484,15 +587,18 @@ def assert_ocr_not_exist(
elif isinstance(res, tuple):
raise AssertionError(
(
f"通过ocr识别到不应存在的文案 {res}",
f"通过ocr在范围[{resolution}]识别到不应存在的文案 {res}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
elif isinstance(res, dict) and True in res.values():
res = filter(lambda x: x[1] is not False, res.items())
raise AssertionError(
(
f"通过OCR识别到不应存在的文案:{dict(res)}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
elif isinstance(res, dict):
if all(value is False for value in res.values()):
pass
else:
res = filter(lambda x: x[1] is not False, res.items())
raise AssertionError(
(
f"通过OCR在范围[{resolution}]识别到不应存在的文案:{dict(res)}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
)
91 changes: 52 additions & 39 deletions src/ocr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@ def ocr(cls, *args, **kwargs):

@classmethod
def ocr_remote(
cls,
target_strings: tuple = None,
picture_abspath: str = None,
similarity: [int, float] = 0.6,
return_default: bool = False,
return_first: bool = False,
lang: str = "ch",
network_retry: int = None,
pause: [int, float] = None,
timeout: [int, float] = None,
max_match_number: int = None,
cls,
target_strings: tuple = None,
picture_abspath: str = None,
similarity: [int, float] = 0.6,
return_default: bool = False,
return_first: bool = False,
lang: str = "ch",
network_retry: int = None,
pause: [int, float] = None,
timeout: [int, float] = None,
max_match_number: int = None,
):
servers = cls.ocr_servers
while servers:
Expand Down Expand Up @@ -96,20 +96,23 @@ def _check_xy(cls):
@classmethod
def click(cls):
from src.mouse_key import MouseKey

cls._check_xy()
MouseKey.click(cls.x, cls.y)
return cls

@classmethod
def right_click(cls):
from src.mouse_key import MouseKey

cls._check_xy()
MouseKey.right_click(cls.x, cls.y)
return cls

@classmethod
def double_click(cls):
from src.mouse_key import MouseKey

cls._check_xy()
MouseKey.double_click(cls.x, cls.y)
return cls
Expand All @@ -126,41 +129,51 @@ def all_result(cls):
@classmethod
def ocr_find_by_range(cls, text, x1=None, x2=None, y1=None, y2=None):
"""
OCR在界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标
:param text: 页面查找关键词
:param x1: x坐标开始范围
:param x2: x坐标结束范围
:param y1: y坐标开始范围
:param y2: y坐标结束范围
OCR在当前界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标
:param text: 页面范围内查找关键词,可自由使用以下参数划定查找区域
:param x1: x坐标开始范围,有效区域为大于 x1 区域
:param x2: x坐标结束范围,有效区域为小于 x1 区域
:param y1: y坐标开始范围,有效区域为大于 y1 区域
:param y2: y坐标结束范围,有效区域为小于 y2 区域
:return: 坐标元组 (x, y)
注意:需要特定区域内只有一组OCR关键词,若任有多组请增加精度,否则默认返回第一组符合条件的关键词坐标
注意:该方法设计是为了筛选出唯一坐标,所以需要特定区域内只有一组OCR关键词,若任有多组数据会直接报错,请增加精度
以默认分辨率 1920*1080 为例,多种示例情况如下:
示例1(识别左半屏幕关键字):ocr_find_by_range(x1=960)
示例1(识别左半屏幕关键字):ocr_find_by_range(x2=960)
示例2(识别下半屏幕关键字):ocr_find_by_range(y1=540)
示例3(识别左半屏幕-上半屏关键字):ocr_find_by_range(x1=960, y1=540)
示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, x2=200, y1=900, y2=950)
示例3(识别右下半屏幕关键字):ocr_find_by_range(x1=960, y1=540)
示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, y1=900, x2=200, y2=950)
"""
defaults = {
'x1': 0,
'x2': 1920,
'y1': 0,
'y2': 1080
}

x1 = x1 if x1 is not None else defaults['x1']
x2 = x2 if x2 is not None else defaults['x2']
y1 = y1 if y1 is not None else defaults['y1']
y2 = y2 if y2 is not None else defaults['y2']
defaults = {"x1": 0, "x2": 1920, "y1": 0, "y2": 1080}

x1 = x1 if x1 is not None else defaults["x1"]
x2 = x2 if x2 is not None else defaults["x2"]
y1 = y1 if y1 is not None else defaults["y1"]
y2 = y2 if y2 is not None else defaults["y2"]

if x1 > x2 or y1 > y2:
raise ValueError("x1 > x2 or y1 > y2")

results = []
ocr_return = cls.ocr(text)
if isinstance(ocr_return, dict):
for key, value in ocr_return.items():
for _, value in ocr_return.items():
if x1 <= value[0] <= x2 and y1 <= value[1] <= y2:
return value
return ocr_return


if __name__ == '__main__':
OCRUtils.ocrx().click()
results.append(value)
if len(results) == 0:
raise ValueError(
f"范围内[{x1, y1} - {x2, y2}]未识别到关键词[{text}],请增加识别精度,识别结果为:{results}"
)
elif len(results) == 1:
return results[0]
else:
raise ValueError(
f"范围内[{x1, y1} - {x2, y2}]识别到多组关键词[{text}],请增加识别精度,识别结果为:{results}"
)
if x1 <= ocr_return[0] <= x2 and y1 <= ocr_return[1] <= y2:
return ocr_return
else:
raise ValueError(
f"范围内[{x1, y1} - {x2, y2}]识别不到关键词,请增加识别精度,识别结果为:{ocr_return}"
)

0 comments on commit f35bb30

Please sign in to comment.