Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1、修复assert_ocr_not_exist断言Bug2、扩展assert_ocr_exist 与 assert_ocr_not_exist断言3、优化OCR识别范围筛选方法 #96

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 121 additions & 15 deletions src/assert_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from src.ocr_utils import OCRUtils as OCR
from src.image_utils import ImageUtils
from src.mouse_key import MouseKey
from src.filectl import FileCtl
from src.dogtail_utils import DogtailUtils
from src.custom_exception import TemplateElementNotFound
Expand Down Expand Up @@ -398,12 +399,11 @@ def assert_ocr_exist(
timeout: [int, float] = None,
max_match_number: int = None,
mode: str = "all",
bbox: dict = None,
):
"""
断言文案存在
:param args:
目标字符,识别一个字符串或多个字符串,并返回其在图片中的坐标;
如果不传参,返回图片中识别到的所有字符串。
:param args: 目标字符,识别一个字符串或多个字符串。
:param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。
:param similarity: 匹配度。
:param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
Expand All @@ -413,10 +413,54 @@ def assert_ocr_exist(
:param timeout: 最大匹配超时,单位秒
:param max_match_number: 最大匹配次数
:param mode: "all" or "any",all 表示识别所有目标字符,any 表示识别任意一个目标字符,默认值为 all
:param bbox:
接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度
字典字段:
start_x: 开始 x 坐标(左上角)
start_y: 开始 y 坐标(左上角)
w: 宽度
h: 高度
end_x: 结束 x 坐标(右下角)
end_y: 结束 y 坐标(右下角)
注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
示例:
{start_x=0, start_y=0, w=100, h=100}
{start_x=0, start_y=0, end_x=100, end_y=100}
"""

if len(args) == 0:
raise ValueError("缺少 ocr 断言关键字")

pic = None
if picture_abspath is not None:
pic = picture_abspath + ".png"

resolution = MouseKey.screen_size()
if bbox is not None:
start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
w = bbox.get("w") if bbox.get("w") is not None else None
h = bbox.get("h") if bbox.get("h") is not None else None
end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None

if start_x is None or start_y is None:
raise ValueError("缺失 start_x 或 start_y 坐标")

wh_provided = w is not None and h is not None
end_xy_provided = end_x is not None and end_y is not None

if not (wh_provided ^ end_xy_provided):
raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")

if end_xy_provided:
w = end_x - start_x
h = end_y - start_y
picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
pic = picture_abspath + ".png"

resolution = f"{start_x, start_y} -> {w, h}"

res = OCR.ocr(
*args,
picture_abspath=pic,
Expand All @@ -430,7 +474,10 @@ def assert_ocr_exist(
)
if res is False:
raise AssertionError(
(f"通过OCR未识别到:{args}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}")
(
f"通过OCR在范围[{resolution}]未识别到:{args}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
if isinstance(res, tuple):
pass
Expand All @@ -440,14 +487,14 @@ def assert_ocr_exist(
res = filter(lambda x: x[1] is False, res.items())
raise AssertionError(
(
f"通过OCR未识别到:{dict(res)}",
f"通过OCR在范围[{resolution}]未识别到:{dict(res)}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
elif mode == "any" and len(res) == list(res.values()).count(False):
raise AssertionError(
(
f"通过OCR未识别到:{args}中的任意一个",
f"通过OCR在范围[{resolution}]未识别到:{args}中的任意一个",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
Expand All @@ -463,11 +510,67 @@ def assert_ocr_not_exist(
pause: [int, float] = None,
timeout: [int, float] = None,
max_match_number: int = None,
bbox: dict = None,
):
"""断言文案不存在"""
"""
断言文案不存在
:param args: 目标字符,识别一个字符串或多个字符串。
:param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。
:param similarity: 匹配度。
:param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
:param lang: `ch`, `en`, `fr`, `german`, `korean`, `japan`
:param network_retry: 连接服务器重试次数
:param pause: 重试间隔时间,单位秒
:param timeout: 最大匹配超时,单位秒
:param max_match_number: 最大匹配次数
:param bbox:
接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度
字典字段:
start_x: 开始 x 坐标(左上角)
start_y: 开始 y 坐标(左上角)
w: 宽度
h: 高度
end_x: 结束 x 坐标(右下角)
end_y: 结束 y 坐标(右下角)
注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
示例:
{start_x=0, start_y=0, w=100, h=100}
{start_x=0, start_y=0, end_x=100, end_y=100}
"""

if len(args) == 0:
raise ValueError("缺少 ocr 断言关键字")

pic = None
if picture_abspath is not None:
pic = picture_abspath + ".png"

resolution = MouseKey.screen_size()
if bbox is not None:
start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
w = bbox.get("w") if bbox.get("w") is not None else None
h = bbox.get("h") if bbox.get("h") is not None else None
end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None

if start_x is None or start_y is None:
raise ValueError("缺失 start_x 或 start_y 坐标")

wh_provided = w is not None and h is not None
end_xy_provided = end_x is not None and end_y is not None

if not (wh_provided ^ end_xy_provided):
raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")

if end_xy_provided:
w = end_x - start_x
h = end_y - start_y
picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
pic = picture_abspath + ".png"

resolution = f"{start_x, start_y} -> {w, h}"

res = OCR.ocr(
*args,
picture_abspath=pic,
Expand All @@ -484,15 +587,18 @@ def assert_ocr_not_exist(
elif isinstance(res, tuple):
raise AssertionError(
(
f"通过ocr识别到不应存在的文案 {res}",
f"通过ocr在范围[{resolution}]识别到不应存在的文案 {res}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
elif isinstance(res, dict) and True in res.values():
res = filter(lambda x: x[1] is not False, res.items())
raise AssertionError(
(
f"通过OCR识别到不应存在的文案:{dict(res)}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
elif isinstance(res, dict):
if all(value is False for value in res.values()):
pass
else:
res = filter(lambda x: x[1] is not False, res.items())
raise AssertionError(
(
f"通过OCR在范围[{resolution}]识别到不应存在的文案:{dict(res)}",
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
)
)
)
91 changes: 52 additions & 39 deletions src/ocr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@ def ocr(cls, *args, **kwargs):

@classmethod
def ocr_remote(
cls,
target_strings: tuple = None,
picture_abspath: str = None,
similarity: [int, float] = 0.6,
return_default: bool = False,
return_first: bool = False,
lang: str = "ch",
network_retry: int = None,
pause: [int, float] = None,
timeout: [int, float] = None,
max_match_number: int = None,
cls,
target_strings: tuple = None,
picture_abspath: str = None,
similarity: [int, float] = 0.6,
return_default: bool = False,
return_first: bool = False,
lang: str = "ch",
network_retry: int = None,
pause: [int, float] = None,
timeout: [int, float] = None,
max_match_number: int = None,
):
servers = cls.ocr_servers
while servers:
Expand Down Expand Up @@ -96,20 +96,23 @@ def _check_xy(cls):
@classmethod
def click(cls):
from src.mouse_key import MouseKey

cls._check_xy()
MouseKey.click(cls.x, cls.y)
return cls

@classmethod
def right_click(cls):
from src.mouse_key import MouseKey

cls._check_xy()
MouseKey.right_click(cls.x, cls.y)
return cls

@classmethod
def double_click(cls):
from src.mouse_key import MouseKey

cls._check_xy()
MouseKey.double_click(cls.x, cls.y)
return cls
Expand All @@ -126,41 +129,51 @@ def all_result(cls):
@classmethod
def ocr_find_by_range(cls, text, x1=None, x2=None, y1=None, y2=None):
"""
OCR在界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标
:param text: 页面查找关键词
:param x1: x坐标开始范围
:param x2: x坐标结束范围
:param y1: y坐标开始范围
:param y2: y坐标结束范围
OCR在当前界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标
:param text: 页面范围内查找关键词,可自由使用以下参数划定查找区域
:param x1: x坐标开始范围,有效区域为大于 x1 区域
:param x2: x坐标结束范围,有效区域为小于 x1 区域
:param y1: y坐标开始范围,有效区域为大于 y1 区域
:param y2: y坐标结束范围,有效区域为小于 y2 区域
:return: 坐标元组 (x, y)

注意:需要特定区域内只有一组OCR关键词,若任有多组请增加精度,否则默认返回第一组符合条件的关键词坐标
注意:该方法设计是为了筛选出唯一坐标,所以需要特定区域内只有一组OCR关键词,若任有多组数据会直接报错,请增加精度

以默认分辨率 1920*1080 为例,多种示例情况如下:
示例1(识别左半屏幕关键字):ocr_find_by_range(x1=960)
示例1(识别左半屏幕关键字):ocr_find_by_range(x2=960)
示例2(识别下半屏幕关键字):ocr_find_by_range(y1=540)
示例3(识别左半屏幕-上半屏关键字):ocr_find_by_range(x1=960, y1=540)
示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, x2=200, y1=900, y2=950)
示例3(识别右下半屏幕关键字):ocr_find_by_range(x1=960, y1=540)
示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, y1=900, x2=200, y2=950)
"""
defaults = {
'x1': 0,
'x2': 1920,
'y1': 0,
'y2': 1080
}

x1 = x1 if x1 is not None else defaults['x1']
x2 = x2 if x2 is not None else defaults['x2']
y1 = y1 if y1 is not None else defaults['y1']
y2 = y2 if y2 is not None else defaults['y2']
defaults = {"x1": 0, "x2": 1920, "y1": 0, "y2": 1080}

x1 = x1 if x1 is not None else defaults["x1"]
x2 = x2 if x2 is not None else defaults["x2"]
y1 = y1 if y1 is not None else defaults["y1"]
y2 = y2 if y2 is not None else defaults["y2"]

if x1 > x2 or y1 > y2:
raise ValueError("x1 > x2 or y1 > y2")

results = []
ocr_return = cls.ocr(text)
if isinstance(ocr_return, dict):
for key, value in ocr_return.items():
for _, value in ocr_return.items():
if x1 <= value[0] <= x2 and y1 <= value[1] <= y2:
return value
return ocr_return


if __name__ == '__main__':
OCRUtils.ocrx().click()
results.append(value)
if len(results) == 0:
raise ValueError(
f"范围内[{x1, y1} - {x2, y2}]未识别到关键词[{text}],请增加识别精度,识别结果为:{results}"
)
elif len(results) == 1:
return results[0]
else:
raise ValueError(
f"范围内[{x1, y1} - {x2, y2}]识别到多组关键词[{text}],请增加识别精度,识别结果为:{results}"
)
if x1 <= ocr_return[0] <= x2 and y1 <= ocr_return[1] <= y2:
return ocr_return
else:
raise ValueError(
f"范围内[{x1, y1} - {x2, y2}]识别不到关键词,请增加识别精度,识别结果为:{ocr_return}"
)
Loading