Merge pull request #96 from Marszzz1116/master

1、修复assert_ocr_not_exist断言Bug2、扩展assert_ocr_exist 与 assert_ocr_not_exist断言3、优化OCR识别范围筛选方法
linuxdeepin · Aug 2, 2024 · f35bb30 · f35bb30
2 parents 9b96e97 + 6835de3
commit f35bb30
Show file tree

Hide file tree

Showing 2 changed files with 173 additions and 54 deletions.
diff --git a/src/assert_common.py b/src/assert_common.py
@@ -13,6 +13,7 @@
 
 from src.ocr_utils import OCRUtils as OCR
 from src.image_utils import ImageUtils
+from src.mouse_key import MouseKey
 from src.filectl import FileCtl
 from src.dogtail_utils import DogtailUtils
 from src.custom_exception import TemplateElementNotFound
@@ -398,12 +399,11 @@ def assert_ocr_exist(
         timeout: [int, float] = None,
         max_match_number: int = None,
         mode: str = "all",
+        bbox: dict = None,
     ):
         """
         断言文案存在
-        :param args:
-            目标字符,识别一个字符串或多个字符串,并返回其在图片中的坐标;
-            如果不传参，返回图片中识别到的所有字符串。
+        :param args: 目标字符,识别一个字符串或多个字符串。
         :param picture_abspath: 要识别的图片路径，如果不传默认截取全屏识别。
         :param similarity: 匹配度。
         :param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
@@ -413,10 +413,54 @@ def assert_ocr_exist(
         :param timeout: 最大匹配超时,单位秒
         :param max_match_number: 最大匹配次数
         :param mode: "all" or "any"，all 表示识别所有目标字符，any 表示识别任意一个目标字符，默认值为 all
+        :param bbox:
+            接收一个字典，包含一个区域，在区域内进行识别，用于干扰较大时提升OCR识别精准度
+            字典字段:
+                start_x: 开始 x 坐标（左上角）
+                start_y: 开始 y 坐标（左上角）
+                w: 宽度
+                h: 高度
+                end_x: 结束 x 坐标（右下角）
+                end_y: 结束 y 坐标（右下角）
+                注意 ： end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
+            示例：
+                {start_x=0， start_y=0， w=100， h=100}
+                {start_x=0， start_y=0， end_x=100， end_y=100}
         """
+
+        if len(args) == 0:
+            raise ValueError("缺少 ocr 断言关键字")
+
         pic = None
         if picture_abspath is not None:
             pic = picture_abspath + ".png"
+
+        resolution = MouseKey.screen_size()
+        if bbox is not None:
+            start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
+            start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
+            w = bbox.get("w") if bbox.get("w") is not None else None
+            h = bbox.get("h") if bbox.get("h") is not None else None
+            end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
+            end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None
+
+            if start_x is None or start_y is None:
+                raise ValueError("缺失 start_x 或 start_y 坐标")
+
+            wh_provided = w is not None and h is not None
+            end_xy_provided = end_x is not None and end_y is not None
+
+            if not (wh_provided ^ end_xy_provided):
+                raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")
+
+            if end_xy_provided:
+                w = end_x - start_x
+                h = end_y - start_y
+            picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
+            pic = picture_abspath + ".png"
+
+            resolution = f"{start_x, start_y} -> {w, h}"
+
         res = OCR.ocr(
             *args,
             picture_abspath=pic,
@@ -430,7 +474,10 @@ def assert_ocr_exist(
         )
         if res is False:
             raise AssertionError(
-                (f"通过OCR未识别到：{args}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}")
+                (
+                    f"通过OCR在范围[{resolution}]未识别到：{args}",
+                    f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
+                )
             )
         if isinstance(res, tuple):
             pass
@@ -440,14 +487,14 @@ def assert_ocr_exist(
                 res = filter(lambda x: x[1] is False, res.items())
                 raise AssertionError(
                     (
-                        f"通过OCR未识别到：{dict(res)}",
+                        f"通过OCR在范围[{resolution}]未识别到：{dict(res)}",
                         f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
                     )
                 )
             elif mode == "any" and len(res) == list(res.values()).count(False):
                 raise AssertionError(
                     (
-                        f"通过OCR未识别到：{args}中的任意一个",
+                        f"通过OCR在范围[{resolution}]未识别到：{args}中的任意一个",
                         f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
                     )
                 )
@@ -463,11 +510,67 @@ def assert_ocr_not_exist(
         pause: [int, float] = None,
         timeout: [int, float] = None,
         max_match_number: int = None,
+        bbox: dict = None,
     ):
-        """断言文案不存在"""
+        """
+        断言文案不存在
+        :param args: 目标字符,识别一个字符串或多个字符串。
+        :param picture_abspath: 要识别的图片路径，如果不传默认截取全屏识别。
+        :param similarity: 匹配度。
+        :param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
+        :param lang: `ch`, `en`, `fr`, `german`, `korean`, `japan`
+        :param network_retry: 连接服务器重试次数
+        :param pause: 重试间隔时间,单位秒
+        :param timeout: 最大匹配超时,单位秒
+        :param max_match_number: 最大匹配次数
+        :param bbox:
+            接收一个字典，包含一个区域，在区域内进行识别，用于干扰较大时提升OCR识别精准度
+            字典字段:
+                start_x: 开始 x 坐标（左上角）
+                start_y: 开始 y 坐标（左上角）
+                w: 宽度
+                h: 高度
+                end_x: 结束 x 坐标（右下角）
+                end_y: 结束 y 坐标（右下角）
+                注意 ： end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
+            示例：
+                {start_x=0， start_y=0， w=100， h=100}
+                {start_x=0， start_y=0， end_x=100， end_y=100}
+        """
+
+        if len(args) == 0:
+            raise ValueError("缺少 ocr 断言关键字")
+
         pic = None
         if picture_abspath is not None:
             pic = picture_abspath + ".png"
+
+        resolution = MouseKey.screen_size()
+        if bbox is not None:
+            start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
+            start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
+            w = bbox.get("w") if bbox.get("w") is not None else None
+            h = bbox.get("h") if bbox.get("h") is not None else None
+            end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
+            end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None
+
+            if start_x is None or start_y is None:
+                raise ValueError("缺失 start_x 或 start_y 坐标")
+
+            wh_provided = w is not None and h is not None
+            end_xy_provided = end_x is not None and end_y is not None
+
+            if not (wh_provided ^ end_xy_provided):
+                raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")
+
+            if end_xy_provided:
+                w = end_x - start_x
+                h = end_y - start_y
+            picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
+            pic = picture_abspath + ".png"
+
+            resolution = f"{start_x, start_y} -> {w, h}"
+
         res = OCR.ocr(
             *args,
             picture_abspath=pic,
@@ -484,15 +587,18 @@ def assert_ocr_not_exist(
         elif isinstance(res, tuple):
             raise AssertionError(
                 (
-                    f"通过ocr识别到不应存在的文案 {res}",
+                    f"通过ocr在范围[{resolution}]识别到不应存在的文案 {res}",
                     f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
                 )
             )
-        elif isinstance(res, dict) and True in res.values():
-            res = filter(lambda x: x[1] is not False, res.items())
-            raise AssertionError(
-                (
-                    f"通过OCR识别到不应存在的文案：{dict(res)}",
-                    f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
+        elif isinstance(res, dict):
+            if all(value is False for value in res.values()):
+                pass
+            else:
+                res = filter(lambda x: x[1] is not False, res.items())
+                raise AssertionError(
+                    (
+                        f"通过OCR在范围[{resolution}]识别到不应存在的文案：{dict(res)}",
+                        f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
+                    )
                 )
-            )
diff --git a/src/ocr_utils.py b/src/ocr_utils.py
@@ -41,17 +41,17 @@ def ocr(cls, *args, **kwargs):
 
     @classmethod
     def ocr_remote(
-            cls,
-            target_strings: tuple = None,
-            picture_abspath: str = None,
-            similarity: [int, float] = 0.6,
-            return_default: bool = False,
-            return_first: bool = False,
-            lang: str = "ch",
-            network_retry: int = None,
-            pause: [int, float] = None,
-            timeout: [int, float] = None,
-            max_match_number: int = None,
+        cls,
+        target_strings: tuple = None,
+        picture_abspath: str = None,
+        similarity: [int, float] = 0.6,
+        return_default: bool = False,
+        return_first: bool = False,
+        lang: str = "ch",
+        network_retry: int = None,
+        pause: [int, float] = None,
+        timeout: [int, float] = None,
+        max_match_number: int = None,
     ):
         servers = cls.ocr_servers
         while servers:
@@ -96,20 +96,23 @@ def _check_xy(cls):
     @classmethod
     def click(cls):
         from src.mouse_key import MouseKey
+
         cls._check_xy()
         MouseKey.click(cls.x, cls.y)
         return cls
 
     @classmethod
     def right_click(cls):
         from src.mouse_key import MouseKey
+
         cls._check_xy()
         MouseKey.right_click(cls.x, cls.y)
         return cls
 
     @classmethod
     def double_click(cls):
         from src.mouse_key import MouseKey
+
         cls._check_xy()
         MouseKey.double_click(cls.x, cls.y)
         return cls
@@ -126,41 +129,51 @@ def all_result(cls):
     @classmethod
     def ocr_find_by_range(cls, text, x1=None, x2=None, y1=None, y2=None):
         """
-        OCR在界面中识别到多个关键词时，通过区域筛选出对应关键词并返回坐标
-        :param text: 页面查找关键词
-        :param x1: x坐标开始范围
-        :param x2: x坐标结束范围
-        :param y1: y坐标开始范围
-        :param y2: y坐标结束范围
+        OCR在当前界面中识别到多个关键词时，通过区域筛选出对应关键词并返回坐标
+        :param text: 页面范围内查找关键词，可自由使用以下参数划定查找区域
+        :param x1: x坐标开始范围，有效区域为大于 x1 区域
+        :param x2: x坐标结束范围，有效区域为小于 x1 区域
+        :param y1: y坐标开始范围，有效区域为大于 y1 区域
+        :param y2: y坐标结束范围，有效区域为小于 y2 区域
         :return: 坐标元组 (x, y)
 
-        注意：需要特定区域内只有一组OCR关键词，若任有多组请增加精度，否则默认返回第一组符合条件的关键词坐标
+        注意：该方法设计是为了筛选出唯一坐标，所以需要特定区域内只有一组OCR关键词，若任有多组数据会直接报错，请增加精度
 
         以默认分辨率 1920*1080 为例，多种示例情况如下：
-        示例1（识别左半屏幕关键字）：ocr_find_by_range(x1=960)
+        示例1（识别左半屏幕关键字）：ocr_find_by_range(x2=960)
         示例2（识别下半屏幕关键字）：ocr_find_by_range(y1=540)
-        示例3（识别左半屏幕-上半屏关键字）：ocr_find_by_range(x1=960, y1=540)
-        示例4（识别特定区域 ：100*900-200*950 内关键字）：ocr_find_by_range(x1=100, x2=200, y1=900, y2=950)
+        示例3（识别右下半屏幕关键字）：ocr_find_by_range(x1=960, y1=540)
+        示例4（识别特定区域 ：100*900-200*950 内关键字）：ocr_find_by_range(x1=100, y1=900, x2=200, y2=950)
         """
-        defaults = {
-            'x1': 0,
-            'x2': 1920,
-            'y1': 0,
-            'y2': 1080
-        }
-
-        x1 = x1 if x1 is not None else defaults['x1']
-        x2 = x2 if x2 is not None else defaults['x2']
-        y1 = y1 if y1 is not None else defaults['y1']
-        y2 = y2 if y2 is not None else defaults['y2']
+        defaults = {"x1": 0, "x2": 1920, "y1": 0, "y2": 1080}
+
+        x1 = x1 if x1 is not None else defaults["x1"]
+        x2 = x2 if x2 is not None else defaults["x2"]
+        y1 = y1 if y1 is not None else defaults["y1"]
+        y2 = y2 if y2 is not None else defaults["y2"]
 
+        if x1 > x2 or y1 > y2:
+            raise ValueError("x1 > x2 or y1 > y2")
+
+        results = []
         ocr_return = cls.ocr(text)
         if isinstance(ocr_return, dict):
-            for key, value in ocr_return.items():
+            for _, value in ocr_return.items():
                 if x1 <= value[0] <= x2 and y1 <= value[1] <= y2:
-                    return value
-        return ocr_return
-
-
-if __name__ == '__main__':
-    OCRUtils.ocrx().click()
+                    results.append(value)
+            if len(results) == 0:
+                raise ValueError(
+                    f"范围内[{x1, y1} - {x2, y2}]未识别到关键词[{text}]，请增加识别精度，识别结果为：{results}"
+                )
+            elif len(results) == 1:
+                return results[0]
+            else:
+                raise ValueError(
+                    f"范围内[{x1, y1} - {x2, y2}]识别到多组关键词[{text}]，请增加识别精度，识别结果为：{results}"
+                )
+        if x1 <= ocr_return[0] <= x2 and y1 <= ocr_return[1] <= y2:
+            return ocr_return
+        else:
+            raise ValueError(
+                f"范围内[{x1, y1} - {x2, y2}]识别不到关键词，请增加识别精度，识别结果为：{ocr_return}"
+            )