diff --git a/src/assert_common.py b/src/assert_common.py index f0d13635..cc3690f2 100644 --- a/src/assert_common.py +++ b/src/assert_common.py @@ -13,6 +13,7 @@ from src.ocr_utils import OCRUtils as OCR from src.image_utils import ImageUtils +from src.mouse_key import MouseKey from src.filectl import FileCtl from src.dogtail_utils import DogtailUtils from src.custom_exception import TemplateElementNotFound @@ -398,12 +399,11 @@ def assert_ocr_exist( timeout: [int, float] = None, max_match_number: int = None, mode: str = "all", + bbox: dict = None, ): """ 断言文案存在 - :param args: - 目标字符,识别一个字符串或多个字符串,并返回其在图片中的坐标; - 如果不传参,返回图片中识别到的所有字符串。 + :param args: 目标字符,识别一个字符串或多个字符串。 :param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。 :param similarity: 匹配度。 :param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。 @@ -413,10 +413,54 @@ def assert_ocr_exist( :param timeout: 最大匹配超时,单位秒 :param max_match_number: 最大匹配次数 :param mode: "all" or "any",all 表示识别所有目标字符,any 表示识别任意一个目标字符,默认值为 all + :param bbox: + 接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度 + 字典字段: + start_x: 开始 x 坐标(左上角) + start_y: 开始 y 坐标(左上角) + w: 宽度 + h: 高度 + end_x: 结束 x 坐标(右下角) + end_y: 结束 y 坐标(右下角) + 注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组 + 示例: + {start_x=0, start_y=0, w=100, h=100} + {start_x=0, start_y=0, end_x=100, end_y=100} """ + + if len(args) == 0: + raise ValueError("缺少 ocr 断言关键字") + pic = None if picture_abspath is not None: pic = picture_abspath + ".png" + + resolution = MouseKey.screen_size() + if bbox is not None: + start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None + start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None + w = bbox.get("w") if bbox.get("w") is not None else None + h = bbox.get("h") if bbox.get("h") is not None else None + end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None + end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None + + if start_x is None or start_y is None: + raise ValueError("缺失 start_x 或 start_y 坐标") + + wh_provided = w is not None and h is not None + end_xy_provided = end_x is not None and end_y is not None + + if not (wh_provided ^ end_xy_provided): + raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组") + + if end_xy_provided: + w = end_x - start_x + h = end_y - start_y + picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h) + pic = picture_abspath + ".png" + + resolution = f"{start_x, start_y} -> {w, h}" + res = OCR.ocr( *args, picture_abspath=pic, @@ -430,7 +474,10 @@ def assert_ocr_exist( ) if res is False: raise AssertionError( - (f"通过OCR未识别到:{args}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}") + ( + f"通过OCR在范围[{resolution}]未识别到:{args}", + f"{pic if pic else GlobalConfig.SCREEN_CACHE}", + ) ) if isinstance(res, tuple): pass @@ -440,14 +487,14 @@ def assert_ocr_exist( res = filter(lambda x: x[1] is False, res.items()) raise AssertionError( ( - f"通过OCR未识别到:{dict(res)}", + f"通过OCR在范围[{resolution}]未识别到:{dict(res)}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}", ) ) elif mode == "any" and len(res) == list(res.values()).count(False): raise AssertionError( ( - f"通过OCR未识别到:{args}中的任意一个", + f"通过OCR在范围[{resolution}]未识别到:{args}中的任意一个", f"{pic if pic else GlobalConfig.SCREEN_CACHE}", ) ) @@ -463,11 +510,67 @@ def assert_ocr_not_exist( pause: [int, float] = None, timeout: [int, float] = None, max_match_number: int = None, + bbox: dict = None, ): - """断言文案不存在""" + """ + 断言文案不存在 + :param args: 目标字符,识别一个字符串或多个字符串。 + :param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。 + :param similarity: 匹配度。 + :param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。 + :param lang: `ch`, `en`, `fr`, `german`, `korean`, `japan` + :param network_retry: 连接服务器重试次数 + :param pause: 重试间隔时间,单位秒 + :param timeout: 最大匹配超时,单位秒 + :param max_match_number: 最大匹配次数 + :param bbox: + 接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度 + 字典字段: + start_x: 开始 x 坐标(左上角) + start_y: 开始 y 坐标(左上角) + w: 宽度 + h: 高度 + end_x: 结束 x 坐标(右下角) + end_y: 结束 y 坐标(右下角) + 注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组 + 示例: + {start_x=0, start_y=0, w=100, h=100} + {start_x=0, start_y=0, end_x=100, end_y=100} + """ + + if len(args) == 0: + raise ValueError("缺少 ocr 断言关键字") + pic = None if picture_abspath is not None: pic = picture_abspath + ".png" + + resolution = MouseKey.screen_size() + if bbox is not None: + start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None + start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None + w = bbox.get("w") if bbox.get("w") is not None else None + h = bbox.get("h") if bbox.get("h") is not None else None + end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None + end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None + + if start_x is None or start_y is None: + raise ValueError("缺失 start_x 或 start_y 坐标") + + wh_provided = w is not None and h is not None + end_xy_provided = end_x is not None and end_y is not None + + if not (wh_provided ^ end_xy_provided): + raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组") + + if end_xy_provided: + w = end_x - start_x + h = end_y - start_y + picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h) + pic = picture_abspath + ".png" + + resolution = f"{start_x, start_y} -> {w, h}" + res = OCR.ocr( *args, picture_abspath=pic, @@ -484,15 +587,18 @@ def assert_ocr_not_exist( elif isinstance(res, tuple): raise AssertionError( ( - f"通过ocr识别到不应存在的文案 {res}", + f"通过ocr在范围[{resolution}]识别到不应存在的文案 {res}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}", ) ) - elif isinstance(res, dict) and True in res.values(): - res = filter(lambda x: x[1] is not False, res.items()) - raise AssertionError( - ( - f"通过OCR识别到不应存在的文案:{dict(res)}", - f"{pic if pic else GlobalConfig.SCREEN_CACHE}", + elif isinstance(res, dict): + if all(value is False for value in res.values()): + pass + else: + res = filter(lambda x: x[1] is not False, res.items()) + raise AssertionError( + ( + f"通过OCR在范围[{resolution}]识别到不应存在的文案:{dict(res)}", + f"{pic if pic else GlobalConfig.SCREEN_CACHE}", + ) ) - ) diff --git a/src/ocr_utils.py b/src/ocr_utils.py index 122ac3b6..4e4b8caa 100644 --- a/src/ocr_utils.py +++ b/src/ocr_utils.py @@ -41,17 +41,17 @@ def ocr(cls, *args, **kwargs): @classmethod def ocr_remote( - cls, - target_strings: tuple = None, - picture_abspath: str = None, - similarity: [int, float] = 0.6, - return_default: bool = False, - return_first: bool = False, - lang: str = "ch", - network_retry: int = None, - pause: [int, float] = None, - timeout: [int, float] = None, - max_match_number: int = None, + cls, + target_strings: tuple = None, + picture_abspath: str = None, + similarity: [int, float] = 0.6, + return_default: bool = False, + return_first: bool = False, + lang: str = "ch", + network_retry: int = None, + pause: [int, float] = None, + timeout: [int, float] = None, + max_match_number: int = None, ): servers = cls.ocr_servers while servers: @@ -96,6 +96,7 @@ def _check_xy(cls): @classmethod def click(cls): from src.mouse_key import MouseKey + cls._check_xy() MouseKey.click(cls.x, cls.y) return cls @@ -103,6 +104,7 @@ def click(cls): @classmethod def right_click(cls): from src.mouse_key import MouseKey + cls._check_xy() MouseKey.right_click(cls.x, cls.y) return cls @@ -110,6 +112,7 @@ def right_click(cls): @classmethod def double_click(cls): from src.mouse_key import MouseKey + cls._check_xy() MouseKey.double_click(cls.x, cls.y) return cls @@ -126,41 +129,51 @@ def all_result(cls): @classmethod def ocr_find_by_range(cls, text, x1=None, x2=None, y1=None, y2=None): """ - OCR在界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标 - :param text: 页面查找关键词 - :param x1: x坐标开始范围 - :param x2: x坐标结束范围 - :param y1: y坐标开始范围 - :param y2: y坐标结束范围 + OCR在当前界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标 + :param text: 页面范围内查找关键词,可自由使用以下参数划定查找区域 + :param x1: x坐标开始范围,有效区域为大于 x1 区域 + :param x2: x坐标结束范围,有效区域为小于 x1 区域 + :param y1: y坐标开始范围,有效区域为大于 y1 区域 + :param y2: y坐标结束范围,有效区域为小于 y2 区域 :return: 坐标元组 (x, y) - 注意:需要特定区域内只有一组OCR关键词,若任有多组请增加精度,否则默认返回第一组符合条件的关键词坐标 + 注意:该方法设计是为了筛选出唯一坐标,所以需要特定区域内只有一组OCR关键词,若任有多组数据会直接报错,请增加精度 以默认分辨率 1920*1080 为例,多种示例情况如下: - 示例1(识别左半屏幕关键字):ocr_find_by_range(x1=960) + 示例1(识别左半屏幕关键字):ocr_find_by_range(x2=960) 示例2(识别下半屏幕关键字):ocr_find_by_range(y1=540) - 示例3(识别左半屏幕-上半屏关键字):ocr_find_by_range(x1=960, y1=540) - 示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, x2=200, y1=900, y2=950) + 示例3(识别右下半屏幕关键字):ocr_find_by_range(x1=960, y1=540) + 示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, y1=900, x2=200, y2=950) """ - defaults = { - 'x1': 0, - 'x2': 1920, - 'y1': 0, - 'y2': 1080 - } - - x1 = x1 if x1 is not None else defaults['x1'] - x2 = x2 if x2 is not None else defaults['x2'] - y1 = y1 if y1 is not None else defaults['y1'] - y2 = y2 if y2 is not None else defaults['y2'] + defaults = {"x1": 0, "x2": 1920, "y1": 0, "y2": 1080} + + x1 = x1 if x1 is not None else defaults["x1"] + x2 = x2 if x2 is not None else defaults["x2"] + y1 = y1 if y1 is not None else defaults["y1"] + y2 = y2 if y2 is not None else defaults["y2"] + if x1 > x2 or y1 > y2: + raise ValueError("x1 > x2 or y1 > y2") + + results = [] ocr_return = cls.ocr(text) if isinstance(ocr_return, dict): - for key, value in ocr_return.items(): + for _, value in ocr_return.items(): if x1 <= value[0] <= x2 and y1 <= value[1] <= y2: - return value - return ocr_return - - -if __name__ == '__main__': - OCRUtils.ocrx().click() + results.append(value) + if len(results) == 0: + raise ValueError( + f"范围内[{x1, y1} - {x2, y2}]未识别到关键词[{text}],请增加识别精度,识别结果为:{results}" + ) + elif len(results) == 1: + return results[0] + else: + raise ValueError( + f"范围内[{x1, y1} - {x2, y2}]识别到多组关键词[{text}],请增加识别精度,识别结果为:{results}" + ) + if x1 <= ocr_return[0] <= x2 and y1 <= ocr_return[1] <= y2: + return ocr_return + else: + raise ValueError( + f"范围内[{x1, y1} - {x2, y2}]识别不到关键词,请增加识别精度,识别结果为:{ocr_return}" + )