From d33f4ecfdc680b177654c96e7c1704b6488875e3 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 1 Aug 2019 19:22:27 +0200 Subject: [PATCH 01/16] ocrd_utils: require PIL, numpy --- ocrd/requirements.txt | 1 - ocrd_modelfactory/requirements.txt | 1 - ocrd_utils/requirements.txt | 2 ++ ocrd_utils/setup.py | 3 +++ 4 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 ocrd_utils/requirements.txt diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index e94643ce38..4efaa41fbc 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -4,7 +4,6 @@ click >=7 requests lxml Pillow >= 5.3.0 -numpy opencv-python-headless Flask jsonschema diff --git a/ocrd_modelfactory/requirements.txt b/ocrd_modelfactory/requirements.txt index 0b6c4da520..ab90481d5d 100644 --- a/ocrd_modelfactory/requirements.txt +++ b/ocrd_modelfactory/requirements.txt @@ -1,2 +1 @@ lxml -Pillow >= 5.3.0 diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt new file mode 100644 index 0000000000..b467462214 --- /dev/null +++ b/ocrd_utils/requirements.txt @@ -0,0 +1,2 @@ +Pillow >= 5.3.0 +numpy >= 1.18 diff --git a/ocrd_utils/setup.py b/ocrd_utils/setup.py index 4a8a1b2e66..3f4939704f 100644 --- a/ocrd_utils/setup.py +++ b/ocrd_utils/setup.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from setuptools import setup +install_requires = open('requirements.txt').read().split('\n') + setup( name='ocrd_utils', version='1.0.0b11', @@ -12,6 +14,7 @@ url='https://github.com/OCR-D/core', license='Apache License 2.0', packages=['ocrd_utils'], + install_requires=install_requires, package_data={'': ['*.json', '*.yml', '*.xml']}, keywords=['OCR', 'OCR-D'] ) From 4192ba5781682319e60c0c175bcac26c91c9ab47 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 1 Aug 2019 19:45:25 +0200 Subject: [PATCH 02/16] utils: import the utility functions from ocrd_tesserocr --- ocrd_utils/ocrd_utils/__init__.py | 208 +++++++++++++++++++++++++++++- tests/test_utils.py | 16 ++- 2 files changed, 212 insertions(+), 12 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index cce7166632..63ec871b47 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -5,29 +5,48 @@ The functions have the syntax X_from_Y, where X/Y can be -points a string encoding a polygon: "0,0 100,0 100,100, 0,100" -polygon an array of x-y-tuples of a polygon: [[0,0], [100,0], [100,100], [0,100]] -xywh a dict with keys for x, y, width and height: {'x': 0, 'y': 0, 'w': 100, 'h': 100} -points is for PAGE +* bbox is a 4-tuple of x0, y0, x1, y1 of the bounding box +* points a string encoding a polygon: "0,0 100,0 100,100, 0,100". points is for PAGE +* polygon an array of x-y-tuples of a polygon: [[0,0], [100,0], [100,100], [0,100]] +* xywh a dict with keys for x, y, width and height: {'x': 0, 'y': 0, 'w': 100, 'h': 100} polygon is what opencv2 expects xywh is what tesserocr expects/produces. """ +import sys +import numpy as np + +from PIL import Image, ImageStat, ImageDraw + __all__ = [ 'abspath', + 'bbox_from_points', + 'bbox_from_xywh', + 'bbox_from_polygon', + 'coordinates_of_segment', 'concat_padded', + 'crop_image', 'getLogger', 'is_local_filename', 'is_string', 'logging', - 'points_from_xywh', + 'membername', + 'points_from_bbox', + 'points_from_polygon', 'points_from_x0y0x1y1', + 'points_from_xywh', 'points_from_y0x0y1x1', + 'polygon_from_bbox', 'polygon_from_points', + 'polygon_from_x0y0x1y1', + 'polygon_from_xywh', + 'polygon_mask', + 'rotate_coordinates', 'safe_filename', 'unzip_file_to_dir', + 'xywh_from_bbox', 'xywh_from_points', 'VERSION', @@ -54,6 +73,65 @@ def abspath(url): url = url[len('file://'):] return os_abspath(url) +def bbox_from_points(points): + """Construct a numeric list representing a bounding box from polygon coordinates in page representation.""" + xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] + return bbox_from_polygon(xys) + +def bbox_from_polygon(polygon): + """Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation.""" + minx = sys.maxsize + miny = sys.maxsize + maxx = 0 + maxy = 0 + for xy in polygon: + if xy[0] < minx: + minx = xy[0] + if xy[0] > maxx: + maxx = xy[0] + if xy[1] < miny: + miny = xy[1] + if xy[1] > maxy: + maxy = xy[1] + return minx, miny, maxx, maxy + +def bbox_from_xywh(xywh): + """Convert a bounding box from a numeric dict to a numeric list representation.""" + return ( + xywh['x'], + xywh['y'], + xywh['x'] + xywh['w'], + xywh['y'] + xywh['h'] + ) + +def xywh_from_polygon(polygon): + """Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" + return xywh_from_bbox(*bbox_from_polygon(polygon)) + +def coordinates_of_segment(segment, parent_image, parent_xywh): + """Extract the relative coordinates polygon of a PAGE segment element. + + Given a Region / TextLine / Word / Glyph `segment` and + the PIL.Image of its parent Page / Region / TextLine / Word + along with its bounding box, calculate the relative coordinates + of the segment within the image. That is, shift all points from + the offset of the parent, and (in case the parent was rotated,) + rotate all points with the center of the image as origin. + + Return the rounded numpy array of the resulting polygon. + """ + # get polygon: + polygon = np.array(polygon_from_points(segment.get_Coords().points)) + # offset correction (shift coordinates to base of segment): + polygon -= np.array([parent_xywh['x'], parent_xywh['y']]) + # angle correction (rotate coordinates if image has been rotated): + if 'angle' in parent_xywh: + polygon = rotate_coordinates( + polygon, parent_xywh['angle'], + orig=np.array([0.5 * parent_image.width, + 0.5 * parent_image.height])) + return np.round(polygon).astype(np.int32) + def concat_padded(base, *args): """ Concatenate string and zero-padded 4 digit number @@ -66,6 +144,53 @@ def concat_padded(base, *args): ret = "%s_%04i" % (ret, n + 1) return ret +def crop_image(image, box=None): + """"Crop an image to a rectangle, filling with background. + + Given a PIL.Image `image` and a list `box` of the bounding + rectangle relative to the image, crop at the box coordinates, + filling everything outside `image` with the background. + (This covers the case where `box` indexes are negative or + larger than `image` width/height. PIL.Image.crop would fill + with black.) Since `image` is not necessarily binarized yet, + determine the background from the median color (instead of + white). + + Return a new PIL.Image. + """ + # todo: perhaps we should issue a warning if we encounter this + # (It should be invalid in PAGE-XML to extend beyond parents.) + if not box: + box = (0, 0, image.width, image.height) + xywh = xywh_from_bbox(*box) + background = ImageStat.Stat(image).median[0] + new_image = Image.new(image.mode, (xywh['w'], xywh['h']), + background) # or 'white' + new_image.paste(image, (-xywh['x'], -xywh['y'])) + return new_image + +def image_from_polygon(image, polygon): + """"Mask an image with a polygon. + + Given a PIL.Image `image` and a numpy array `polygon` + of relative coordinates into the image, put everything + outside the polygon hull to the background. Since `image` + is not necessarily binarized yet, determine the background + from the median color (instead of white). + + Return a new PIL.Image. + """ + mask = polygon_mask(image, polygon) + # create a background image from its median color + # (in case it has not been binarized yet): + # array = np.asarray(image) + # background = np.median(array, axis=[0, 1], keepdims=True) + # array = np.broadcast_to(background.astype(np.uint8), array.shape) + background = ImageStat.Stat(image).median[0] + new_image = Image.new('L', image.size, background) + new_image.paste(image, mask=mask) + return new_image + def is_local_filename(url): """ Whether a url is a local filename. @@ -82,6 +207,19 @@ def is_string(val): """ return isinstance(val, str) +def membername(class_, val): + """Convert a member variable/constant into a member name string.""" + return next((k for k, v in class_.__dict__.items() if v == val), str(val)) + +def points_from_bbox(minx, miny, maxx, maxy): + """Construct polygon coordinates in page representation from a numeric list representing a bounding box.""" + return "%i,%i %i,%i %i,%i %i,%i" % ( + minx, miny, maxx, miny, maxx, maxy, minx, maxy) + +def points_from_polygon(polygon): + """Convert polygon coordinates from a numeric list representation to a page representation.""" + return " ".join("%i,%i" % (x, y) for x, y in polygon) + def points_from_xywh(box): """ Constructs a polygon representation from a rectangle described as a dict with keys x, y, w, h. @@ -125,6 +263,10 @@ def points_from_x0y0x1y1(xyxy): x0, y1 ) +def polygon_from_bbox(minx, miny, maxx, maxy): + """Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box.""" + return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] + def polygon_from_points(points): """ Constructs a numpy-compatible polygon from a page representation. @@ -135,6 +277,53 @@ def polygon_from_points(points): polygon.append([float(x_y[0]), float(x_y[1])]) return polygon +def polygon_from_x0y0x1y1(x0y0x1y1): + """Construct polygon coordinates in numeric list representation from a string list representing a bounding box.""" + minx = int(x0y0x1y1[0]) + miny = int(x0y0x1y1[1]) + maxx = int(x0y0x1y1[2]) + maxy = int(x0y0x1y1[3]) + return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] + +def polygon_from_xywh(xywh): + """Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box.""" + return polygon_from_bbox(*bbox_from_xywh(xywh)) + +def polygon_mask(image, coordinates): + """"Create a mask image of a polygon. + + Given a PIL.Image `image` (merely for dimensions), and + a numpy array `polygon` of relative coordinates into the image, + create a new image of the same size with black background, and + fill everything inside the polygon hull with white. + + Return the new PIL.Image. + """ + mask = Image.new('L', image.size, 0) + if isinstance(coordinates, np.ndarray): + coordinates = list(map(tuple, coordinates)) + ImageDraw.Draw(mask).polygon(coordinates, outline=1, fill=255) + return mask + +def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): + """Apply a passive rotation transformation to the given coordinates. + + Given a numpy array `polygon` of points and a rotation `angle`, + as well as a numpy array `orig` of the center of rotation, + calculate the coordinate transform corresponding to the rotation + of the underlying image by `angle` degrees at `center` by + applying translation to the center, inverse rotation, + and translation from the center. + + Return a numpy array of the resulting polygon. + """ + angle = np.deg2rad(angle) + cos = np.cos(angle) + sin = np.sin(angle) + # active rotation: [[cos, -sin], [sin, cos]] + # passive rotation: [[cos, sin], [-sin, cos]] (inverse) + return orig + np.dot(polygon - orig, np.array([[cos, sin], [-sin, cos]]).transpose()) + def safe_filename(url): """ Sanitize input to be safely used as the basename of a local file. @@ -151,6 +340,15 @@ def unzip_file_to_dir(path_to_zip, output_directory): z.extractall(output_directory) z.close() +def xywh_from_bbox(minx, miny, maxx, maxy): + """Convert a bounding box from a numeric list to a numeric dict representation.""" + return { + 'x': minx, + 'y': miny, + 'w': maxx - minx, + 'h': maxy - miny, + } + def xywh_from_points(points): """ Constructs an dict representing a rectangle with keys x, y, w, h diff --git a/tests/test_utils.py b/tests/test_utils.py index c0bfd28221..deef6e3d65 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,13 +1,14 @@ from tests.base import TestCase, main from ocrd_utils import ( abspath, + bbox_from_points, + concat_padded, is_local_filename, - points_from_xywh, is_string, - concat_padded, points_from_x0y0x1y1, - xywh_from_points, + points_from_xywh, polygon_from_points, + xywh_from_points, ) from ocrd_models.utils import xmllint_format @@ -24,15 +25,16 @@ def test_is_local_filename(self): def test_points_from_xywh(self): self.assertEqual( points_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}), - '100,100 200,100 200,200 100,200' - ) + '100,100 200,100 200,200 100,200') def test_points_from_x0y0x1y1(self): self.assertEqual( points_from_x0y0x1y1([100, 100, 200, 200]), - '100,100 200,100 200,200 100,200' - ) + '100,100 200,100 200,200 100,200') + def test_bbox_from_points(self): + self.assertEqual( + bbox_from_points('100,100 200,100 200,200 100,200'), (100, 100, 200, 200)) def test_xywh_from_points(self): self.assertEqual( From 1cdc3b60da6b8a9264d7295d2ed8598f933b3763 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 1 Aug 2019 19:49:41 +0200 Subject: [PATCH 03/16] relax numpy version requirement --- ocrd_utils/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt index b467462214..b4ccf8ef14 100644 --- a/ocrd_utils/requirements.txt +++ b/ocrd_utils/requirements.txt @@ -1,2 +1,2 @@ Pillow >= 5.3.0 -numpy >= 1.18 +numpy >= 1.17.0 From cb21189bfae893fcbd3e359b19ba63b76f3c3e08 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 2 Aug 2019 17:17:13 +0200 Subject: [PATCH 04/16] utils: more tests --- tests/test_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index deef6e3d65..1a09044ff6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,7 @@ from ocrd_utils import ( abspath, bbox_from_points, + bbox_from_xywh, concat_padded, is_local_filename, is_string, @@ -9,6 +10,7 @@ points_from_xywh, polygon_from_points, xywh_from_points, + xywh_from_polygon, ) from ocrd_models.utils import xmllint_format @@ -36,6 +38,16 @@ def test_bbox_from_points(self): self.assertEqual( bbox_from_points('100,100 200,100 200,200 100,200'), (100, 100, 200, 200)) + def test_bbox_from_xywh(self): + self.assertEqual( + bbox_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}), + (100, 100, 200, 200)) + + def test_xywh_from_polygon(self): + self.assertEqual( + xywh_from_polygon([[100, 100], [200, 100], [200, 200], [100, 200]]), + {'x': 100, 'y': 100, 'w': 100, 'h': 100}) + def test_xywh_from_points(self): self.assertEqual( xywh_from_points('100,100 200,100 200,200 100,200'), From e6ec6a4cd34f5b3f9d281afa8f22e5ca774bbf7a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 12:57:00 +0200 Subject: [PATCH 05/16] Apply suggestions by @bertsky Co-Authored-By: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> --- ocrd_utils/ocrd_utils/__init__.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 63ec871b47..547245b4dd 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -2,17 +2,31 @@ Utility methods usable in various circumstances. * xywh_from_points, points_from_xywh, polygon_from_points +* coordinates_of_segment, coordinates_for_segment, rotate_coordinates +These functions convert polygon outlines for PAGE elements on all hierarchy levels (page, region, line, word, glyph) between relative coordinates w.r.t. parent segment and absolute coordinates w.r.t. the top-level (source) image. This includes rotation and offset correction. + +* polygon_mask, image_from_polygon, crop_image + +These functions combine PIL.Image with polygons or bboxes. The functions have the syntax X_from_Y, where X/Y can be -* bbox is a 4-tuple of x0, y0, x1, y1 of the bounding box -* points a string encoding a polygon: "0,0 100,0 100,100, 0,100". points is for PAGE -* polygon an array of x-y-tuples of a polygon: [[0,0], [100,0], [100,100], [0,100]] + * `x0y0x1y1` is a 4-list of strings x0, y0, x1, y1 of the bounding box (rectangle) + * `bbox` is a 4-tuple of integers x0, y0, x1, y1 of the bounding box (rectangle) +* points a string encoding a polygon: "0,0 100,0 100,100, 0,100" + * `polygon` is a list of 2-lists of integers x, y of points forming an (implicitly closed) polygon path: [[0,0], [100,0], [100,100], [0,100]] * xywh a dict with keys for x, y, width and height: {'x': 0, 'y': 0, 'w': 100, 'h': 100} -polygon is what opencv2 expects +polygon is what opencv2 and higher-level coordinate functions in ocrd_utils expect + +xywh and x0y0x1y1 are what tesserocr expects/produces. +points is what PAGE-XML uses. +bbox is what PIL.Image uses. +* is_local_filename, safe_filename, abspath + +* is_string, membername, concat_padded -xywh is what tesserocr expects/produces. +* MIMETYPE_PAGE, EXT_TO_MIME, VERSION """ import sys @@ -222,7 +236,7 @@ def points_from_polygon(polygon): def points_from_xywh(box): """ - Constructs a polygon representation from a rectangle described as a dict with keys x, y, w, h. + Construct polygon coordinates in page representation from numeric dict representing a bounding box. """ x, y, w, h = box['x'], box['y'], box['w'], box['h'] # tesseract uses a different region representation format @@ -269,7 +283,7 @@ def polygon_from_bbox(minx, miny, maxx, maxy): def polygon_from_points(points): """ - Constructs a numpy-compatible polygon from a page representation. + Convert polygon coordinates in page representation to polygon coordinates in numeric list representation. """ polygon = [] for pair in points.split(" "): @@ -351,7 +365,7 @@ def xywh_from_bbox(minx, miny, maxx, maxy): def xywh_from_points(points): """ - Constructs an dict representing a rectangle with keys x, y, w, h + Construct a numeric dict representing a bounding box from polygon coordinates in page representation. """ xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] minx = sys.maxsize From 5032086ce87c1193e7a36c264a5a74f9cf489300 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 13:36:58 +0200 Subject: [PATCH 06/16] coordinates_for_segment, OCR-D/ocrd_tesserocr#68 --- ocrd_utils/ocrd_utils/__init__.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 547245b4dd..4472c0496f 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -122,6 +122,28 @@ def xywh_from_polygon(polygon): """Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" return xywh_from_bbox(*bbox_from_polygon(polygon)) +def coordinates_for_segment(polygon, parent_image, parent_xywh): + """Convert a relative coordinates polygon to absolute. + + Given a numpy array `polygon` of points, and a parent PIL.Image + along with its bounding box to which the coordinates are relative, + calculate the absolute coordinates within the page. + That is, (in case the parent was rotated,) rotate all points in + opposite direction with the center of the image as origin, then + shift all points to the offset of the parent. + + Return the rounded numpy array of the resulting polygon. + """ + # angle correction (unrotate coordinates if image has been rotated): + if 'angle' in parent_xywh: + polygon = rotate_coordinates( + polygon, -parent_xywh['angle'], + orig=np.array([0.5 * parent_image.width, + 0.5 * parent_image.height])) + # offset correction (shift coordinates from base of segment): + polygon += np.array([parent_xywh['x'], parent_xywh['y']]) + return np.round(polygon).astype(np.int32) + def coordinates_of_segment(segment, parent_image, parent_xywh): """Extract the relative coordinates polygon of a PAGE segment element. From 5adc51234ccc7527c539c77664db9d0754ae8fc1 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 13:59:36 +0200 Subject: [PATCH 07/16] utils: tests --- ocrd_utils/ocrd_utils/__init__.py | 1 + tests/test_utils.py | 36 +++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 4472c0496f..9d0b4d683a 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -39,6 +39,7 @@ 'bbox_from_points', 'bbox_from_xywh', 'bbox_from_polygon', + 'coordinates_for_segment', 'coordinates_of_segment', 'concat_padded', 'crop_image', diff --git a/tests/test_utils.py b/tests/test_utils.py index 1a09044ff6..cf2eca61b6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,14 +1,26 @@ from tests.base import TestCase, main from ocrd_utils import ( abspath, + bbox_from_points, bbox_from_xywh, + + coordinates_of_segment, + coordinates_for_segment, + concat_padded, is_local_filename, is_string, + membername, + + points_from_bbox, points_from_x0y0x1y1, points_from_xywh, + points_from_polygon, + polygon_from_points, + polygon_from_x0y0x1y1, + xywh_from_points, xywh_from_polygon, ) @@ -16,8 +28,6 @@ class TestUtils(TestCase): - # def runTest(self): - def test_abspath(self): self.assertEqual(abspath('file:///'), '/') @@ -29,6 +39,21 @@ def test_points_from_xywh(self): points_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}), '100,100 200,100 200,200 100,200') + def test_points_from_bbox(self): + self.assertEqual( + points_from_bbox(100, 100, 200, 200), + '100,100 200,100 200,200 100,200') + + def test_points_from_polygon(self): + self.assertEqual( + points_from_polygon([[100, 100], [200, 100], [200, 200], [100, 200]]), + '100,100 200,100 200,200 100,200') + + def test_polygon_from_x0y0x1y1(self): + self.assertEqual( + polygon_from_x0y0x1y1([100, 100, 200, 200]), + [[100, 100], [200, 100], [200, 200], [100, 200]]) + def test_points_from_x0y0x1y1(self): self.assertEqual( points_from_x0y0x1y1([100, 100, 200, 200]), @@ -77,6 +102,13 @@ def test_xmllint(self): pretty_xml = xmllint_format(xml_str).decode('utf-8') self.assertEqual(pretty_xml, '\n' + xml_str) + def test_membername(self): + class Klazz: + def __init__(self): + self.prop = 42 + instance = Klazz() + self.assertEqual(membername(instance, 42), 'prop') + if __name__ == '__main__': main() From 16562f298f74079401dc6195ccd9a80881ac379d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 14:01:30 +0200 Subject: [PATCH 08/16] :memo: changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142a99afa3..b4d6df71ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Added: + + * many utility methods for image manipulation and coordinate handling, #268 + Fixed: * Regression with ocrd_page data types, #269 From 22a46248414d404664d8ec465214444b73c6b7ab Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 17:12:54 +0200 Subject: [PATCH 09/16] export PAGE namespace in to_xml, regression from #271 --- ocrd_models/ocrd_models/ocrd_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_models/ocrd_models/ocrd_page.py b/ocrd_models/ocrd_models/ocrd_page.py index 5da8677c66..295ab76872 100644 --- a/ocrd_models/ocrd_models/ocrd_page.py +++ b/ocrd_models/ocrd_models/ocrd_page.py @@ -66,5 +66,5 @@ def to_xml(el): Serialize ``pc:PcGts`` document """ sio = StringIO() - el.export(sio, 0, name_='PcGts', namespacedef_='xmlns="%s"' % NAMESPACES['page']) + el.export(sio, 0, name_='PcGts', namespacedef_='xmlns:pc="%s"' % NAMESPACES['page']) return '\n' + sio.getvalue() From 8413ada79c8761bcde8e0b1b3a30aba9e5f51bdb Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 17:14:13 +0200 Subject: [PATCH 10/16] move remaining tesserocr common fns to workspace --- ocrd/ocrd/workspace.py | 194 +++++++++++++++++++++++++++++- ocrd_utils/ocrd_utils/__init__.py | 21 ++-- tests/test_utils.py | 3 - 3 files changed, 207 insertions(+), 11 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 78de09231c..c88ac54a27 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -1,3 +1,4 @@ +import io import os from os.path import join @@ -6,12 +7,22 @@ import numpy as np from ocrd_models import OcrdMets, OcrdExif -from ocrd_utils import getLogger, is_local_filename, abspath +from ocrd_utils import ( + abspath, + coordinates_of_segment, + crop_image, + getLogger, + image_from_polygon, + is_local_filename, + polygon_from_points, + xywh_from_points, +) from .workspace_backup import WorkspaceBackupManager log = getLogger('ocrd.workspace') + class Workspace(): """ A workspace is a temporary directory set up for a processor. It's the @@ -194,3 +205,184 @@ def resolve_image_as_pil(self, image_url, coords=None): np.min(poly[:, 0]):np.max(poly[:, 0]) ] return Image.fromarray(region_cut) + + def image_from_page(self, page, page_id): + """Extract the Page image from the workspace. + + Given a PageType object, `page`, extract its PIL.Image from + AlternativeImage if it exists. Otherwise extract the PIL.Image + from imageFilename and crop it if a Border exists. Otherwise + just return it. + + When cropping, respect any orientation angle annotated for + the page (from page-level deskewing) by rotating the + cropped image, respectively. + + If the resulting page image is larger than the bounding box of + `page`, pass down the page's box coordinates with an offset of + half the width/height difference. + + Return the extracted image, and the absolute coordinates of + the page's bounding box / border (for passing down), and + an OcrdExif instance associated with the original image. + """ + page_image = self.resolve_image_as_pil(page.imageFilename) + page_image_info = OcrdExif(page_image) + page_xywh = {'x': 0, + 'y': 0, + 'w': page_image.width, + 'h': page_image.height} + # region angle: PAGE orientation is defined clockwise, + # whereas PIL/ndimage rotation is in mathematical direction: + page_xywh['angle'] = -(page.get_orientation() or 0) + # FIXME: remove PrintSpace here as soon as GT abides by the PAGE standard: + border = page.get_Border() or page.get_PrintSpace() + if border: + page_points = border.get_Coords().points + log.debug("Using explictly set page border '%s' for page '%s'", + page_points, page_id) + page_xywh = xywh_from_points(page_points) + + alternative_image = page.get_AlternativeImage() + if alternative_image: + # (e.g. from page-level cropping, binarization, deskewing or despeckling) + # assumes implicit cropping (i.e. page_xywh has been applied already) + log.debug("Using AlternativeImage %d (%s) for page '%s'", + len(alternative_image), alternative_image[-1].get_comments(), + page_id) + page_image = self.resolve_image_as_pil( + alternative_image[-1].get_filename()) + elif border: + # get polygon outline of page border: + page_polygon = np.array(polygon_from_points(page_points)) + # create a mask from the page polygon: + page_image = image_from_polygon(page_image, page_polygon) + # recrop into page rectangle: + page_image = crop_image(page_image, + box=(page_xywh['x'], + page_xywh['y'], + page_xywh['x'] + page_xywh['w'], + page_xywh['y'] + page_xywh['h'])) + if 'angle' in page_xywh and page_xywh['angle']: + log.info("About to rotate page '%s' by %.2f°", + page_id, page_xywh['angle']) + page_image = page_image.rotate(page_xywh['angle'], + expand=True, + #resample=Image.BILINEAR, + fillcolor='white') + # subtract offset from any increase in binary region size over source: + page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w'])) + page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h'])) + return page_image, page_xywh, page_image_info + + def image_from_segment(self, segment, parent_image, parent_xywh): + """Extract a segment image from its parent's image. + + Given a PIL.Image of the parent, `parent_image`, and + its absolute coordinates, `parent_xywh`, and a PAGE + segment (TextRegion / TextLine / Word / Glyph) object + logically contained in it, `segment`, extract its PIL.Image + from AlternativeImage (if it exists), or via cropping from + `parent_image`. + + When cropping, respect any orientation angle annotated for + the parent (from parent-level deskewing) by compensating the + segment coordinates in an inverse transformation (translation + to center, rotation, re-translation). + Also, mind the difference between annotated and actual size + of the parent (usually from deskewing), by a respective offset + into the image. Cropping uses a polygon mask (not just the + rectangle). + + When cropping, respect any orientation angle annotated for + the segment (from segment-level deskewing) by rotating the + cropped image, respectively. + + If the resulting segment image is larger than the bounding box of + `segment`, pass down the segment's box coordinates with an offset + of half the width/height difference. + + Return the extracted image, and the absolute coordinates of + the segment's bounding box (for passing down). + """ + segment_xywh = xywh_from_points(segment.get_Coords().points) + if 'orientation' in segment.__dict__: + # angle: PAGE orientation is defined clockwise, + # whereas PIL/ndimage rotation is in mathematical direction: + segment_xywh['angle'] = -(segment.get_orientation() or 0) + alternative_image = segment.get_AlternativeImage() + if alternative_image: + # (e.g. from segment-level cropping, binarization, deskewing or despeckling) + log.debug("Using AlternativeImage %d (%s) for segment '%s'", + len(alternative_image), alternative_image[-1].get_comments(), + segment.id) + segment_image = self.resolve_image_as_pil( + alternative_image[-1].get_filename()) + else: + # get polygon outline of segment relative to parent image: + segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) + # create a mask from the segment polygon: + segment_image = image_from_polygon(parent_image, segment_polygon) + # recrop into segment rectangle: + segment_image = crop_image(segment_image, + box=(segment_xywh['x'] - parent_xywh['x'], + segment_xywh['y'] - parent_xywh['y'], + segment_xywh['x'] - parent_xywh['x'] + segment_xywh['w'], + segment_xywh['y'] - parent_xywh['y'] + segment_xywh['h'])) + # note: We should mask overlapping neighbouring segments here, + # but finding the right clipping rules can be difficult if operating + # on the raw (non-binary) image data alone: for each intersection, it + # must be decided which one of either segment or neighbour to assign, + # e.g. an ImageRegion which properly contains our TextRegion should be + # completely ignored, but an ImageRegion which is properly contained + # in our TextRegion should be completely masked, while partial overlap + # may be more difficult to decide. On the other hand, on the binary image, + # we can use connected component analysis to mask foreground areas which + # originate in the neighbouring regions. But that would introduce either + # the assumption that the input has already been binarized, or a dependency + # on some ad-hoc binarization method. Thus, it is preferable to use + # a dedicated processor for this (which produces clipped AlternativeImage + # or reduced polygon coordinates). + if 'angle' in segment_xywh and segment_xywh['angle']: + log.info("About to rotate segment '%s' by %.2f°", + segment.id, segment_xywh['angle']) + segment_image = segment_image.rotate(segment_xywh['angle'], + expand=True, + #resample=Image.BILINEAR, + fillcolor='white') + # subtract offset from any increase in binary region size over source: + segment_xywh['x'] -= round(0.5 * max(0, segment_image.width - segment_xywh['w'])) + segment_xywh['y'] -= round(0.5 * max(0, segment_image.height - segment_xywh['h'])) + return segment_image, segment_xywh + + # pylint: disable=redefined-builtin + def save_image_file(self, image, + file_id, + page_id=None, + file_grp='OCR-D-IMG', # or -BIN? + format='PNG', + force=True): + """Store and reference an image as file into the workspace. + + Given a PIL.Image `image`, and an ID `file_id` to use in METS, + store the image under the fileGrp `file_grp` and physical page + `page_id` into the workspace (in a file name based on + the `file_grp`, `file_id` and `format` extension). + + Return the (absolute) path of the created file. + """ + image_bytes = io.BytesIO() + image.save(image_bytes, format=format) + file_path = os.path.join(file_grp, + file_id + '.' + format.lower()) + out = self.add_file( + ID=file_id, + file_grp=file_grp, + pageId=page_id, + local_filename=file_path, + mimetype='image/' + format.lower(), + content=image_bytes.getvalue(), + force=force) + log.info('created file ID: %s, file_grp: %s, path: %s', + file_id, file_grp, out.local_filename) + return file_path diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 9d0b4d683a..050256db43 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -29,11 +29,6 @@ * MIMETYPE_PAGE, EXT_TO_MIME, VERSION """ -import sys -import numpy as np - -from PIL import Image, ImageStat, ImageDraw - __all__ = [ 'abspath', 'bbox_from_points', @@ -45,9 +40,13 @@ 'crop_image', 'getLogger', 'is_local_filename', + 'image_from_segment', 'is_string', 'logging', 'membername', + 'image_from_page', + 'image_from_segment', + 'image_from_polygon', 'points_from_bbox', 'points_from_polygon', 'points_from_x0y0x1y1', @@ -69,15 +68,23 @@ 'EXT_TO_MIME', ] +import io +import os +from os.path import isfile, abspath as os_abspath import re import sys -import logging -from os.path import isfile, abspath as os_abspath from zipfile import ZipFile +import numpy as np +from PIL import Image, ImageStat, ImageDraw + +import logging from .logging import getLogger from .constants import * # pylint: disable=wildcard-import +LOG = getLogger('ocrd_utils') + + def abspath(url): """ Get a full path to a file or file URL diff --git a/tests/test_utils.py b/tests/test_utils.py index cf2eca61b6..2a8ebaeb6f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,9 +5,6 @@ bbox_from_points, bbox_from_xywh, - coordinates_of_segment, - coordinates_for_segment, - concat_padded, is_local_filename, is_string, From ac95d38995d6ed167146fbbfc1305f3edc7a35df Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 19:07:16 +0200 Subject: [PATCH 11/16] :memo: format utils pydoc --- ocrd_utils/ocrd_utils/__init__.py | 78 +++++++++++++++++-------------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 050256db43..463948e069 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -1,32 +1,45 @@ """ Utility methods usable in various circumstances. -* xywh_from_points, points_from_xywh, polygon_from_points -* coordinates_of_segment, coordinates_for_segment, rotate_coordinates +* ``xywh_from_points``, ``points_from_xywh``, ``polygon_from_points`` +* ``coordinates_of_segment``, ``coordinates_for_segment``, ``rotate_coordinates`` -These functions convert polygon outlines for PAGE elements on all hierarchy levels (page, region, line, word, glyph) between relative coordinates w.r.t. parent segment and absolute coordinates w.r.t. the top-level (source) image. This includes rotation and offset correction. +These functions convert polygon outlines for PAGE elements on all hierarchy +levels (page, region, line, word, glyph) between relative coordinates w.r.t. +parent segment and absolute coordinates w.r.t. the top-level (source) image. +This includes rotation and offset correction. -* polygon_mask, image_from_polygon, crop_image +* ``polygon_mask``, ``image_from_polygon``, ``crop_image`` These functions combine PIL.Image with polygons or bboxes. -The functions have the syntax X_from_Y, where X/Y can be +The functions have the syntax ``X_from_Y``, where ``X``/``Y`` can be - * `x0y0x1y1` is a 4-list of strings x0, y0, x1, y1 of the bounding box (rectangle) - * `bbox` is a 4-tuple of integers x0, y0, x1, y1 of the bounding box (rectangle) -* points a string encoding a polygon: "0,0 100,0 100,100, 0,100" - * `polygon` is a list of 2-lists of integers x, y of points forming an (implicitly closed) polygon path: [[0,0], [100,0], [100,100], [0,100]] -* xywh a dict with keys for x, y, width and height: {'x': 0, 'y': 0, 'w': 100, 'h': 100} +* ``x0y0x1y1`` is a 4-list of strings ``x0``, ``y0``, ``x1``, ``y1`` of the + bounding box (rectangle) +* ``bbox`` is a 4-tuple of integers x0, y0, x1, y1 of the bounding box (rectangle) +* ``points`` a string encoding a polygon: ``"0,0 100,0 100,100, 0,100"`` +* ``polygon`` is a list of 2-lists of integers x, y of points forming an (implicitly closed) polygon path: ``[[0,0], [100,0], [100,100], [0,100]]`` +* ``xywh`` a dict with keys for x, y, width and height: ``{'x': 0, 'y': 0, 'w': 100, 'h': 100}`` -polygon is what opencv2 and higher-level coordinate functions in ocrd_utils expect +``polygon`` is what opencv2 and higher-level coordinate functions in ocrd_utils expect -xywh and x0y0x1y1 are what tesserocr expects/produces. -points is what PAGE-XML uses. -bbox is what PIL.Image uses. -* is_local_filename, safe_filename, abspath +``xywh`` and ``x0y0x1y1`` are what tesserocr expects/produces. -* is_string, membername, concat_padded +``points`` is what PAGE-XML uses. -* MIMETYPE_PAGE, EXT_TO_MIME, VERSION +``bbox`` is what PIL.Image uses. + +* ``is_local_filename``, ``safe_filename``, ``abspath`` + +FS-related utilities + +* ``is_string``, ``membername``, ``concat_padded`` + +String and OOP utilities + +* ``MIMETYPE_PAGE``, ``EXT_TO_MIME``, ``VERSION`` + +Constants """ __all__ = [ @@ -40,12 +53,9 @@ 'crop_image', 'getLogger', 'is_local_filename', - 'image_from_segment', 'is_string', 'logging', 'membername', - 'image_from_page', - 'image_from_segment', 'image_from_polygon', 'points_from_bbox', 'points_from_polygon', @@ -133,7 +143,7 @@ def xywh_from_polygon(polygon): def coordinates_for_segment(polygon, parent_image, parent_xywh): """Convert a relative coordinates polygon to absolute. - Given a numpy array `polygon` of points, and a parent PIL.Image + Given a numpy array ``polygon`` of points, and a parent PIL.Image along with its bounding box to which the coordinates are relative, calculate the absolute coordinates within the page. That is, (in case the parent was rotated,) rotate all points in @@ -155,7 +165,7 @@ def coordinates_for_segment(polygon, parent_image, parent_xywh): def coordinates_of_segment(segment, parent_image, parent_xywh): """Extract the relative coordinates polygon of a PAGE segment element. - Given a Region / TextLine / Word / Glyph `segment` and + Given a Region / TextLine / Word / Glyph ``segment`` and the PIL.Image of its parent Page / Region / TextLine / Word along with its bounding box, calculate the relative coordinates of the segment within the image. That is, shift all points from @@ -191,12 +201,12 @@ def concat_padded(base, *args): def crop_image(image, box=None): """"Crop an image to a rectangle, filling with background. - Given a PIL.Image `image` and a list `box` of the bounding + Given a PIL.Image ``image`` and a list ``box`` of the bounding rectangle relative to the image, crop at the box coordinates, - filling everything outside `image` with the background. - (This covers the case where `box` indexes are negative or - larger than `image` width/height. PIL.Image.crop would fill - with black.) Since `image` is not necessarily binarized yet, + filling everything outside ``image`` with the background. + (This covers the case where ``box`` indexes are negative or + larger than ``image`` width/height. PIL.Image.crop would fill + with black.) Since ``image`` is not necessarily binarized yet, determine the background from the median color (instead of white). @@ -216,9 +226,9 @@ def crop_image(image, box=None): def image_from_polygon(image, polygon): """"Mask an image with a polygon. - Given a PIL.Image `image` and a numpy array `polygon` + Given a PIL.Image ``image`` and a numpy array ``polygon`` of relative coordinates into the image, put everything - outside the polygon hull to the background. Since `image` + outside the polygon hull to the background. Since ``image`` is not necessarily binarized yet, determine the background from the median color (instead of white). @@ -336,8 +346,8 @@ def polygon_from_xywh(xywh): def polygon_mask(image, coordinates): """"Create a mask image of a polygon. - Given a PIL.Image `image` (merely for dimensions), and - a numpy array `polygon` of relative coordinates into the image, + Given a PIL.Image ``image`` (merely for dimensions), and + a numpy array ``polygon`` of relative coordinates into the image, create a new image of the same size with black background, and fill everything inside the polygon hull with white. @@ -352,10 +362,10 @@ def polygon_mask(image, coordinates): def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): """Apply a passive rotation transformation to the given coordinates. - Given a numpy array `polygon` of points and a rotation `angle`, - as well as a numpy array `orig` of the center of rotation, + Given a numpy array ``polygon`` of points and a rotation ``angle``, + as well as a numpy array ``orig`` of the center of rotation, calculate the coordinate transform corresponding to the rotation - of the underlying image by `angle` degrees at `center` by + of the underlying image by ``angle`` degrees at ``center`` by applying translation to the center, inverse rotation, and translation from the center. From 677890b8140c43d89414314e202236e565ba210f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 19:18:12 +0200 Subject: [PATCH 12/16] :memo: format utils pydoc --- ocrd_utils/ocrd_utils/__init__.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 463948e069..4f7fe5cd5b 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -1,8 +1,7 @@ """ Utility methods usable in various circumstances. -* ``xywh_from_points``, ``points_from_xywh``, ``polygon_from_points`` -* ``coordinates_of_segment``, ``coordinates_for_segment``, ``rotate_coordinates`` +* ``coordinates_of_segment``, ``coordinates_for_segment``, ``rotate_coordinates``, ``xywh_from_points``, ``points_from_xywh``, ``polygon_from_points`` These functions convert polygon outlines for PAGE elements on all hierarchy levels (page, region, line, word, glyph) between relative coordinates w.r.t. @@ -14,12 +13,12 @@ These functions combine PIL.Image with polygons or bboxes. The functions have the syntax ``X_from_Y``, where ``X``/``Y`` can be -* ``x0y0x1y1`` is a 4-list of strings ``x0``, ``y0``, ``x1``, ``y1`` of the - bounding box (rectangle) -* ``bbox`` is a 4-tuple of integers x0, y0, x1, y1 of the bounding box (rectangle) -* ``points`` a string encoding a polygon: ``"0,0 100,0 100,100, 0,100"`` -* ``polygon`` is a list of 2-lists of integers x, y of points forming an (implicitly closed) polygon path: ``[[0,0], [100,0], [100,100], [0,100]]`` -* ``xywh`` a dict with keys for x, y, width and height: ``{'x': 0, 'y': 0, 'w': 100, 'h': 100}`` + * ``bbox`` is a 4-tuple of integers x0, y0, x1, y1 of the bounding box (rectangle) + * ``points`` a string encoding a polygon: ``"0,0 100,0 100,100, 0,100"`` + * ``polygon`` is a list of 2-lists of integers x, y of points forming an (implicitly closed) polygon path: ``[[0,0], [100,0], [100,100], [0,100]]`` + * ``xywh`` a dict with keys for x, y, width and height: ``{'x': 0, 'y': 0, 'w': 100, 'h': 100}`` + * ``x0y0x1y1`` is a 4-list of strings ``x0``, ``y0``, ``x1``, ``y1`` of the bounding box (rectangle) + * ``y0x0y1x1`` is the same as ``x0y0x1y1`` with positions of ``x`` and ``y`` in the list swapped ``polygon`` is what opencv2 and higher-level coordinate functions in ocrd_utils expect @@ -289,7 +288,7 @@ def points_from_xywh(box): def points_from_y0x0y1x1(yxyx): """ - Constructs a polygon representation from a rectangle described as a list [y0, x0, y1, x1] + Construct a polygon representation from a rectangle described as a list [y0, x0, y1, x1] """ y0 = yxyx[0] x0 = yxyx[1] @@ -304,7 +303,7 @@ def points_from_y0x0y1x1(yxyx): def points_from_x0y0x1y1(xyxy): """ - Constructs a polygon representation from a rectangle described as a list [x0, y0, x1, y1] + Construct a polygon representation from a rectangle described as a list [x0, y0, x1, y1] """ x0 = xyxy[0] y0 = xyxy[1] @@ -371,7 +370,7 @@ def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): Return a numpy array of the resulting polygon. """ - angle = np.deg2rad(angle) + angle = np.deg2rad(angle) # pylint: disable=assignment-from-no-return cos = np.cos(angle) sin = np.sin(angle) # active rotation: [[cos, -sin], [sin, cos]] From ef214761fe1b598a33f3720f0a5ca80ff0fce12c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 19:51:30 +0200 Subject: [PATCH 13/16] :memo: changelog --- CHANGELOG.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c95fa3eacc..24344dad55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,25 @@ Versioned according to [Semantic Versioning](http://semver.org/). Added: - * many utility methods for image manipulation and coordinate handling, #268 + * many utility methods for image manipulation and coordinate handling, #268, OCR-D/ocrd_tesserocr#49 + * `bbox_from_points` + * `bbox_from_xywh` + * `bbox_from_polygon` + * `coordinates_for_segment` + * `coordinates_of_segment` + * `crop_image` + * `membername` + * `image_from_polygon` + * `points_from_bbox` + * `points_from_polygon` + * `points_from_xywh` + * `polygon_from_bbox` + * `polygon_from_x0y0x1y1` + * `polygon_from_xywh` + * `polygon_mask` + * `rotate_coordinates` + * `xywh_from_bbox` + * Spec-conformant handling of AlternativeImage, OCR-D/spec#116, OCR-D/ocrd_tesserocr#33, #284 Changed: From f1772ce2faf20d10b5fbc70448a409c418327a9d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 20:00:03 +0200 Subject: [PATCH 14/16] :art: in Workspace docstrings: S/crop/cut/ --- ocrd/ocrd/workspace.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 6518c6f0c0..748590e5ed 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -247,12 +247,12 @@ def image_from_page(self, page, page_id): Given a PageType object, `page`, extract its PIL.Image from AlternativeImage if it exists. Otherwise extract the PIL.Image - from imageFilename and crop it if a Border exists. Otherwise + from imageFilename and cut it if a Border exists. Otherwise just return it. - When cropping, respect any orientation angle annotated for + When cuting, respect any orientation angle annotated for the page (from page-level deskewing) by rotating the - cropped image, respectively. + cut image, respectively. If the resulting page image is larger than the bounding box of `page`, pass down the page's box coordinates with an offset of @@ -281,8 +281,8 @@ def image_from_page(self, page, page_id): alternative_image = page.get_AlternativeImage() if alternative_image: - # (e.g. from page-level cropping, binarization, deskewing or despeckling) - # assumes implicit cropping (i.e. page_xywh has been applied already) + # (e.g. from page-level cutting, binarization, deskewing or despeckling) + # assumes implicit cutting (i.e. page_xywh has been applied already) log.debug("Using AlternativeImage %d (%s) for page '%s'", len(alternative_image), alternative_image[-1].get_comments(), page_id) @@ -293,7 +293,7 @@ def image_from_page(self, page, page_id): page_polygon = np.array(polygon_from_points(page_points)) # create a mask from the page polygon: page_image = image_from_polygon(page_image, page_polygon) - # recrop into page rectangle: + # crop into page rectangle: page_image = crop_image(page_image, box=(page_xywh['x'], page_xywh['y'], @@ -318,21 +318,21 @@ def image_from_segment(self, segment, parent_image, parent_xywh): its absolute coordinates, `parent_xywh`, and a PAGE segment (TextRegion / TextLine / Word / Glyph) object logically contained in it, `segment`, extract its PIL.Image - from AlternativeImage (if it exists), or via cropping from + from AlternativeImage (if it exists), or via cutting from `parent_image`. - When cropping, respect any orientation angle annotated for + When cutting, respect any orientation angle annotated for the parent (from parent-level deskewing) by compensating the segment coordinates in an inverse transformation (translation to center, rotation, re-translation). Also, mind the difference between annotated and actual size of the parent (usually from deskewing), by a respective offset - into the image. Cropping uses a polygon mask (not just the + into the image. Cutting uses a polygon mask (not just the rectangle). - When cropping, respect any orientation angle annotated for + When cutting, respect any orientation angle annotated for the segment (from segment-level deskewing) by rotating the - cropped image, respectively. + cutted image, respectively. If the resulting segment image is larger than the bounding box of `segment`, pass down the segment's box coordinates with an offset @@ -348,7 +348,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh): segment_xywh['angle'] = -(segment.get_orientation() or 0) alternative_image = segment.get_AlternativeImage() if alternative_image: - # (e.g. from segment-level cropping, binarization, deskewing or despeckling) + # (e.g. from segment-level cutting, binarization, deskewing or despeckling) log.debug("Using AlternativeImage %d (%s) for segment '%s'", len(alternative_image), alternative_image[-1].get_comments(), segment.id) @@ -359,7 +359,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh): segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) # create a mask from the segment polygon: segment_image = image_from_polygon(parent_image, segment_polygon) - # recrop into segment rectangle: + # crop into segment rectangle: segment_image = crop_image(segment_image, box=(segment_xywh['x'] - parent_xywh['x'], segment_xywh['y'] - parent_xywh['y'], From 50f76213eb0311c90de56e24ac898d64e0574d9a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 20:08:13 +0200 Subject: [PATCH 15/16] :fire: deprecate resolve_image_as_pil in favor of image_from_page and image_from_segment --- ocrd/ocrd/workspace.py | 11 ++++++++--- ocrd/requirements.txt | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 748590e5ed..4e992abd1b 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -6,6 +6,7 @@ from PIL import Image import numpy as np from atomicwrites import atomic_write +from deprecated.sphinx import deprecated from ocrd_models import OcrdMets, OcrdExif from ocrd_utils import ( @@ -205,7 +206,11 @@ def resolve_image_exif(self, image_url): self.image_cache['exif'][image_url] = OcrdExif(Image.open(image_filename)) return self.image_cache['exif'][image_url] + @deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment") def resolve_image_as_pil(self, image_url, coords=None): + return self._resolve_image_as_pil(image_url, coords) + + def _resolve_image_as_pil(self, image_url, coords=None): """ Resolve an image URL to a PIL image. @@ -262,7 +267,7 @@ def image_from_page(self, page, page_id): the page's bounding box / border (for passing down), and an OcrdExif instance associated with the original image. """ - page_image = self.resolve_image_as_pil(page.imageFilename) + page_image = self._resolve_image_as_pil(page.imageFilename) page_image_info = OcrdExif(page_image) page_xywh = {'x': 0, 'y': 0, @@ -286,7 +291,7 @@ def image_from_page(self, page, page_id): log.debug("Using AlternativeImage %d (%s) for page '%s'", len(alternative_image), alternative_image[-1].get_comments(), page_id) - page_image = self.resolve_image_as_pil( + page_image = self._resolve_image_as_pil( alternative_image[-1].get_filename()) elif border: # get polygon outline of page border: @@ -352,7 +357,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh): log.debug("Using AlternativeImage %d (%s) for segment '%s'", len(alternative_image), alternative_image[-1].get_comments(), segment.id) - segment_image = self.resolve_image_as_pil( + segment_image = self._resolve_image_as_pil( alternative_image[-1].get_filename()) else: # get polygon outline of segment relative to parent image: diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index e2998f3f3b..a31a6ba545 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -9,3 +9,4 @@ Flask jsonschema pyyaml atomicwrites >= 1.3.0 +Deprecated == 1.2.0 From 5332936dca2e5537413dd403abae95687041fdce Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 21 Aug 2019 11:06:50 +0200 Subject: [PATCH 16/16] Revert ":art: in Workspace docstrings: S/crop/cut/" This reverts commit f1772ce2faf20d10b5fbc70448a409c418327a9d. --- ocrd/ocrd/workspace.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 4e992abd1b..6260393d64 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -252,12 +252,12 @@ def image_from_page(self, page, page_id): Given a PageType object, `page`, extract its PIL.Image from AlternativeImage if it exists. Otherwise extract the PIL.Image - from imageFilename and cut it if a Border exists. Otherwise + from imageFilename and crop it if a Border exists. Otherwise just return it. - When cuting, respect any orientation angle annotated for + When cropping, respect any orientation angle annotated for the page (from page-level deskewing) by rotating the - cut image, respectively. + cropped image, respectively. If the resulting page image is larger than the bounding box of `page`, pass down the page's box coordinates with an offset of @@ -286,8 +286,8 @@ def image_from_page(self, page, page_id): alternative_image = page.get_AlternativeImage() if alternative_image: - # (e.g. from page-level cutting, binarization, deskewing or despeckling) - # assumes implicit cutting (i.e. page_xywh has been applied already) + # (e.g. from page-level cropping, binarization, deskewing or despeckling) + # assumes implicit cropping (i.e. page_xywh has been applied already) log.debug("Using AlternativeImage %d (%s) for page '%s'", len(alternative_image), alternative_image[-1].get_comments(), page_id) @@ -298,7 +298,7 @@ def image_from_page(self, page, page_id): page_polygon = np.array(polygon_from_points(page_points)) # create a mask from the page polygon: page_image = image_from_polygon(page_image, page_polygon) - # crop into page rectangle: + # recrop into page rectangle: page_image = crop_image(page_image, box=(page_xywh['x'], page_xywh['y'], @@ -323,21 +323,21 @@ def image_from_segment(self, segment, parent_image, parent_xywh): its absolute coordinates, `parent_xywh`, and a PAGE segment (TextRegion / TextLine / Word / Glyph) object logically contained in it, `segment`, extract its PIL.Image - from AlternativeImage (if it exists), or via cutting from + from AlternativeImage (if it exists), or via cropping from `parent_image`. - When cutting, respect any orientation angle annotated for + When cropping, respect any orientation angle annotated for the parent (from parent-level deskewing) by compensating the segment coordinates in an inverse transformation (translation to center, rotation, re-translation). Also, mind the difference between annotated and actual size of the parent (usually from deskewing), by a respective offset - into the image. Cutting uses a polygon mask (not just the + into the image. Cropping uses a polygon mask (not just the rectangle). - When cutting, respect any orientation angle annotated for + When cropping, respect any orientation angle annotated for the segment (from segment-level deskewing) by rotating the - cutted image, respectively. + cropped image, respectively. If the resulting segment image is larger than the bounding box of `segment`, pass down the segment's box coordinates with an offset @@ -353,7 +353,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh): segment_xywh['angle'] = -(segment.get_orientation() or 0) alternative_image = segment.get_AlternativeImage() if alternative_image: - # (e.g. from segment-level cutting, binarization, deskewing or despeckling) + # (e.g. from segment-level cropping, binarization, deskewing or despeckling) log.debug("Using AlternativeImage %d (%s) for segment '%s'", len(alternative_image), alternative_image[-1].get_comments(), segment.id) @@ -364,7 +364,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh): segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) # create a mask from the segment polygon: segment_image = image_from_polygon(parent_image, segment_polygon) - # crop into segment rectangle: + # recrop into segment rectangle: segment_image = crop_image(segment_image, box=(segment_xywh['x'] - parent_xywh['x'], segment_xywh['y'] - parent_xywh['y'],