diff --git a/CHANGELOG.md b/CHANGELOG.md index 5620ae25e8..24344dad55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Added: + + * many utility methods for image manipulation and coordinate handling, #268, OCR-D/ocrd_tesserocr#49 + * `bbox_from_points` + * `bbox_from_xywh` + * `bbox_from_polygon` + * `coordinates_for_segment` + * `coordinates_of_segment` + * `crop_image` + * `membername` + * `image_from_polygon` + * `points_from_bbox` + * `points_from_polygon` + * `points_from_xywh` + * `polygon_from_bbox` + * `polygon_from_x0y0x1y1` + * `polygon_from_xywh` + * `polygon_mask` + * `rotate_coordinates` + * `xywh_from_bbox` + * Spec-conformant handling of AlternativeImage, OCR-D/spec#116, OCR-D/ocrd_tesserocr#33, #284 + Changed: * workspace bagger will create files with extension @@ -526,9 +548,12 @@ Fixed Initial Release +<<<<<<< HEAD +======= [1.0.0b15]: ../../compare/v1.0.0b15...v1.0.0b14 [1.0.0b14]: ../../compare/v1.0.0b14...v1.0.0b13 [1.0.0b13]: ../../compare/v1.0.0b13...v1.0.0b12 +>>>>>>> master [1.0.0b12]: ../../compare/v1.0.0b12...v1.0.0b11 [1.0.0b11]: ../../compare/v1.0.0b11...v1.0.0b10 [1.0.0b10]: ../../compare/v1.0.0b10...v1.0.0b9 diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index da818fc53d..6260393d64 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -1,4 +1,4 @@ -# import os +import io from os import makedirs, chdir, getcwd, unlink from os.path import join as pjoin, isdir @@ -6,14 +6,26 @@ from PIL import Image import numpy as np from atomicwrites import atomic_write +from deprecated.sphinx import deprecated from ocrd_models import OcrdMets, OcrdExif -from ocrd_utils import getLogger, is_local_filename, abspath, pushd_popd +from ocrd_utils import ( + abspath, + coordinates_of_segment, + crop_image, + getLogger, + image_from_polygon, + is_local_filename, + polygon_from_points, + xywh_from_points, + pushd_popd, +) from .workspace_backup import WorkspaceBackupManager log = getLogger('ocrd.workspace') + class Workspace(): """ A workspace is a temporary directory set up for a processor. It's the @@ -194,7 +206,11 @@ def resolve_image_exif(self, image_url): self.image_cache['exif'][image_url] = OcrdExif(Image.open(image_filename)) return self.image_cache['exif'][image_url] + @deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment") def resolve_image_as_pil(self, image_url, coords=None): + return self._resolve_image_as_pil(image_url, coords) + + def _resolve_image_as_pil(self, image_url, coords=None): """ Resolve an image URL to a PIL image. @@ -230,3 +246,183 @@ def resolve_image_as_pil(self, image_url, coords=None): np.min(poly[:, 0]):np.max(poly[:, 0]) ] return Image.fromarray(region_cut) + + def image_from_page(self, page, page_id): + """Extract the Page image from the workspace. + + Given a PageType object, `page`, extract its PIL.Image from + AlternativeImage if it exists. Otherwise extract the PIL.Image + from imageFilename and crop it if a Border exists. Otherwise + just return it. + + When cropping, respect any orientation angle annotated for + the page (from page-level deskewing) by rotating the + cropped image, respectively. + + If the resulting page image is larger than the bounding box of + `page`, pass down the page's box coordinates with an offset of + half the width/height difference. + + Return the extracted image, and the absolute coordinates of + the page's bounding box / border (for passing down), and + an OcrdExif instance associated with the original image. + """ + page_image = self._resolve_image_as_pil(page.imageFilename) + page_image_info = OcrdExif(page_image) + page_xywh = {'x': 0, + 'y': 0, + 'w': page_image.width, + 'h': page_image.height} + # region angle: PAGE orientation is defined clockwise, + # whereas PIL/ndimage rotation is in mathematical direction: + page_xywh['angle'] = -(page.get_orientation() or 0) + # FIXME: remove PrintSpace here as soon as GT abides by the PAGE standard: + border = page.get_Border() or page.get_PrintSpace() + if border: + page_points = border.get_Coords().points + log.debug("Using explictly set page border '%s' for page '%s'", + page_points, page_id) + page_xywh = xywh_from_points(page_points) + + alternative_image = page.get_AlternativeImage() + if alternative_image: + # (e.g. from page-level cropping, binarization, deskewing or despeckling) + # assumes implicit cropping (i.e. page_xywh has been applied already) + log.debug("Using AlternativeImage %d (%s) for page '%s'", + len(alternative_image), alternative_image[-1].get_comments(), + page_id) + page_image = self._resolve_image_as_pil( + alternative_image[-1].get_filename()) + elif border: + # get polygon outline of page border: + page_polygon = np.array(polygon_from_points(page_points)) + # create a mask from the page polygon: + page_image = image_from_polygon(page_image, page_polygon) + # recrop into page rectangle: + page_image = crop_image(page_image, + box=(page_xywh['x'], + page_xywh['y'], + page_xywh['x'] + page_xywh['w'], + page_xywh['y'] + page_xywh['h'])) + if 'angle' in page_xywh and page_xywh['angle']: + log.info("About to rotate page '%s' by %.2f°", + page_id, page_xywh['angle']) + page_image = page_image.rotate(page_xywh['angle'], + expand=True, + #resample=Image.BILINEAR, + fillcolor='white') + # subtract offset from any increase in binary region size over source: + page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w'])) + page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h'])) + return page_image, page_xywh, page_image_info + + def image_from_segment(self, segment, parent_image, parent_xywh): + """Extract a segment image from its parent's image. + + Given a PIL.Image of the parent, `parent_image`, and + its absolute coordinates, `parent_xywh`, and a PAGE + segment (TextRegion / TextLine / Word / Glyph) object + logically contained in it, `segment`, extract its PIL.Image + from AlternativeImage (if it exists), or via cropping from + `parent_image`. + + When cropping, respect any orientation angle annotated for + the parent (from parent-level deskewing) by compensating the + segment coordinates in an inverse transformation (translation + to center, rotation, re-translation). + Also, mind the difference between annotated and actual size + of the parent (usually from deskewing), by a respective offset + into the image. Cropping uses a polygon mask (not just the + rectangle). + + When cropping, respect any orientation angle annotated for + the segment (from segment-level deskewing) by rotating the + cropped image, respectively. + + If the resulting segment image is larger than the bounding box of + `segment`, pass down the segment's box coordinates with an offset + of half the width/height difference. + + Return the extracted image, and the absolute coordinates of + the segment's bounding box (for passing down). + """ + segment_xywh = xywh_from_points(segment.get_Coords().points) + if 'orientation' in segment.__dict__: + # angle: PAGE orientation is defined clockwise, + # whereas PIL/ndimage rotation is in mathematical direction: + segment_xywh['angle'] = -(segment.get_orientation() or 0) + alternative_image = segment.get_AlternativeImage() + if alternative_image: + # (e.g. from segment-level cropping, binarization, deskewing or despeckling) + log.debug("Using AlternativeImage %d (%s) for segment '%s'", + len(alternative_image), alternative_image[-1].get_comments(), + segment.id) + segment_image = self._resolve_image_as_pil( + alternative_image[-1].get_filename()) + else: + # get polygon outline of segment relative to parent image: + segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) + # create a mask from the segment polygon: + segment_image = image_from_polygon(parent_image, segment_polygon) + # recrop into segment rectangle: + segment_image = crop_image(segment_image, + box=(segment_xywh['x'] - parent_xywh['x'], + segment_xywh['y'] - parent_xywh['y'], + segment_xywh['x'] - parent_xywh['x'] + segment_xywh['w'], + segment_xywh['y'] - parent_xywh['y'] + segment_xywh['h'])) + # note: We should mask overlapping neighbouring segments here, + # but finding the right clipping rules can be difficult if operating + # on the raw (non-binary) image data alone: for each intersection, it + # must be decided which one of either segment or neighbour to assign, + # e.g. an ImageRegion which properly contains our TextRegion should be + # completely ignored, but an ImageRegion which is properly contained + # in our TextRegion should be completely masked, while partial overlap + # may be more difficult to decide. On the other hand, on the binary image, + # we can use connected component analysis to mask foreground areas which + # originate in the neighbouring regions. But that would introduce either + # the assumption that the input has already been binarized, or a dependency + # on some ad-hoc binarization method. Thus, it is preferable to use + # a dedicated processor for this (which produces clipped AlternativeImage + # or reduced polygon coordinates). + if 'angle' in segment_xywh and segment_xywh['angle']: + log.info("About to rotate segment '%s' by %.2f°", + segment.id, segment_xywh['angle']) + segment_image = segment_image.rotate(segment_xywh['angle'], + expand=True, + #resample=Image.BILINEAR, + fillcolor='white') + # subtract offset from any increase in binary region size over source: + segment_xywh['x'] -= round(0.5 * max(0, segment_image.width - segment_xywh['w'])) + segment_xywh['y'] -= round(0.5 * max(0, segment_image.height - segment_xywh['h'])) + return segment_image, segment_xywh + + # pylint: disable=redefined-builtin + def save_image_file(self, image, + file_id, + page_id=None, + file_grp='OCR-D-IMG', # or -BIN? + format='PNG', + force=True): + """Store and reference an image as file into the workspace. + + Given a PIL.Image `image`, and an ID `file_id` to use in METS, + store the image under the fileGrp `file_grp` and physical page + `page_id` into the workspace (in a file name based on + the `file_grp`, `file_id` and `format` extension). + + Return the (absolute) path of the created file. + """ + image_bytes = io.BytesIO() + image.save(image_bytes, format=format) + file_path = pjoin(file_grp, file_id + '.' + format.lower()) + out = self.add_file( + ID=file_id, + file_grp=file_grp, + pageId=page_id, + local_filename=file_path, + mimetype='image/' + format.lower(), + content=image_bytes.getvalue(), + force=force) + log.info('created file ID: %s, file_grp: %s, path: %s', + file_id, file_grp, out.local_filename) + return file_path diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index 6a2f8eb132..a31a6ba545 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -3,10 +3,10 @@ bagit_profile >= 1.3.0 click >=7 requests lxml -Pillow == 5.4.1 -numpy +Pillow >= 5.3.0 opencv-python-headless Flask jsonschema pyyaml atomicwrites >= 1.3.0 +Deprecated == 1.2.0 diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index bea8849664..d3d8d4257f 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -1,33 +1,75 @@ """ Utility methods usable in various circumstances. -* xywh_from_points, points_from_xywh, polygon_from_points +* ``coordinates_of_segment``, ``coordinates_for_segment``, ``rotate_coordinates``, ``xywh_from_points``, ``points_from_xywh``, ``polygon_from_points`` -The functions have the syntax X_from_Y, where X/Y can be +These functions convert polygon outlines for PAGE elements on all hierarchy +levels (page, region, line, word, glyph) between relative coordinates w.r.t. +parent segment and absolute coordinates w.r.t. the top-level (source) image. +This includes rotation and offset correction. -points a string encoding a polygon: "0,0 100,0 100,100, 0,100" -polygon an array of x-y-tuples of a polygon: [[0,0], [100,0], [100,100], [0,100]] -xywh a dict with keys for x, y, width and height: {'x': 0, 'y': 0, 'w': 100, 'h': 100} -points is for PAGE +* ``polygon_mask``, ``image_from_polygon``, ``crop_image`` -polygon is what opencv2 expects +These functions combine PIL.Image with polygons or bboxes. +The functions have the syntax ``X_from_Y``, where ``X``/``Y`` can be -xywh is what tesserocr expects/produces. + * ``bbox`` is a 4-tuple of integers x0, y0, x1, y1 of the bounding box (rectangle) + * ``points`` a string encoding a polygon: ``"0,0 100,0 100,100, 0,100"`` + * ``polygon`` is a list of 2-lists of integers x, y of points forming an (implicitly closed) polygon path: ``[[0,0], [100,0], [100,100], [0,100]]`` + * ``xywh`` a dict with keys for x, y, width and height: ``{'x': 0, 'y': 0, 'w': 100, 'h': 100}`` + * ``x0y0x1y1`` is a 4-list of strings ``x0``, ``y0``, ``x1``, ``y1`` of the bounding box (rectangle) + * ``y0x0y1x1`` is the same as ``x0y0x1y1`` with positions of ``x`` and ``y`` in the list swapped + +``polygon`` is what opencv2 and higher-level coordinate functions in ocrd_utils expect + +``xywh`` and ``x0y0x1y1`` are what tesserocr expects/produces. + +``points`` is what PAGE-XML uses. + +``bbox`` is what PIL.Image uses. + +* ``is_local_filename``, ``safe_filename``, ``abspath`` + +FS-related utilities + +* ``is_string``, ``membername``, ``concat_padded`` + +String and OOP utilities + +* ``MIMETYPE_PAGE``, ``EXT_TO_MIME``, ``VERSION`` + +Constants """ __all__ = [ 'abspath', + 'bbox_from_points', + 'bbox_from_xywh', + 'bbox_from_polygon', + 'coordinates_for_segment', + 'coordinates_of_segment', 'concat_padded', + 'crop_image', 'getLogger', 'is_local_filename', 'is_string', 'logging', - 'points_from_xywh', + 'membername', + 'image_from_polygon', + 'points_from_bbox', + 'points_from_polygon', 'points_from_x0y0x1y1', + 'points_from_xywh', 'points_from_y0x0y1x1', + 'polygon_from_bbox', 'polygon_from_points', + 'polygon_from_x0y0x1y1', + 'polygon_from_xywh', + 'polygon_mask', + 'rotate_coordinates', 'safe_filename', 'unzip_file_to_dir', + 'xywh_from_bbox', 'xywh_from_points', 'VERSION', @@ -35,17 +77,26 @@ 'EXT_TO_MIME', ] +import io import re import sys import logging +import os from os import getcwd, chdir from os.path import isfile, abspath as os_abspath from zipfile import ZipFile import contextlib +import numpy as np +from PIL import Image, ImageStat, ImageDraw + +import logging from .logging import getLogger from .constants import * # pylint: disable=wildcard-import +LOG = getLogger('ocrd_utils') + + def abspath(url): """ Get a full path to a file or file URL @@ -56,6 +107,87 @@ def abspath(url): url = url[len('file://'):] return os_abspath(url) +def bbox_from_points(points): + """Construct a numeric list representing a bounding box from polygon coordinates in page representation.""" + xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] + return bbox_from_polygon(xys) + +def bbox_from_polygon(polygon): + """Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation.""" + minx = sys.maxsize + miny = sys.maxsize + maxx = 0 + maxy = 0 + for xy in polygon: + if xy[0] < minx: + minx = xy[0] + if xy[0] > maxx: + maxx = xy[0] + if xy[1] < miny: + miny = xy[1] + if xy[1] > maxy: + maxy = xy[1] + return minx, miny, maxx, maxy + +def bbox_from_xywh(xywh): + """Convert a bounding box from a numeric dict to a numeric list representation.""" + return ( + xywh['x'], + xywh['y'], + xywh['x'] + xywh['w'], + xywh['y'] + xywh['h'] + ) + +def xywh_from_polygon(polygon): + """Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" + return xywh_from_bbox(*bbox_from_polygon(polygon)) + +def coordinates_for_segment(polygon, parent_image, parent_xywh): + """Convert a relative coordinates polygon to absolute. + + Given a numpy array ``polygon`` of points, and a parent PIL.Image + along with its bounding box to which the coordinates are relative, + calculate the absolute coordinates within the page. + That is, (in case the parent was rotated,) rotate all points in + opposite direction with the center of the image as origin, then + shift all points to the offset of the parent. + + Return the rounded numpy array of the resulting polygon. + """ + # angle correction (unrotate coordinates if image has been rotated): + if 'angle' in parent_xywh: + polygon = rotate_coordinates( + polygon, -parent_xywh['angle'], + orig=np.array([0.5 * parent_image.width, + 0.5 * parent_image.height])) + # offset correction (shift coordinates from base of segment): + polygon += np.array([parent_xywh['x'], parent_xywh['y']]) + return np.round(polygon).astype(np.int32) + +def coordinates_of_segment(segment, parent_image, parent_xywh): + """Extract the relative coordinates polygon of a PAGE segment element. + + Given a Region / TextLine / Word / Glyph ``segment`` and + the PIL.Image of its parent Page / Region / TextLine / Word + along with its bounding box, calculate the relative coordinates + of the segment within the image. That is, shift all points from + the offset of the parent, and (in case the parent was rotated,) + rotate all points with the center of the image as origin. + + Return the rounded numpy array of the resulting polygon. + """ + # get polygon: + polygon = np.array(polygon_from_points(segment.get_Coords().points)) + # offset correction (shift coordinates to base of segment): + polygon -= np.array([parent_xywh['x'], parent_xywh['y']]) + # angle correction (rotate coordinates if image has been rotated): + if 'angle' in parent_xywh: + polygon = rotate_coordinates( + polygon, parent_xywh['angle'], + orig=np.array([0.5 * parent_image.width, + 0.5 * parent_image.height])) + return np.round(polygon).astype(np.int32) + @contextlib.contextmanager def pushd_popd(newcwd=None): oldcwd = getcwd() @@ -78,6 +210,53 @@ def concat_padded(base, *args): ret = "%s_%04i" % (ret, n + 1) return ret +def crop_image(image, box=None): + """"Crop an image to a rectangle, filling with background. + + Given a PIL.Image ``image`` and a list ``box`` of the bounding + rectangle relative to the image, crop at the box coordinates, + filling everything outside ``image`` with the background. + (This covers the case where ``box`` indexes are negative or + larger than ``image`` width/height. PIL.Image.crop would fill + with black.) Since ``image`` is not necessarily binarized yet, + determine the background from the median color (instead of + white). + + Return a new PIL.Image. + """ + # todo: perhaps we should issue a warning if we encounter this + # (It should be invalid in PAGE-XML to extend beyond parents.) + if not box: + box = (0, 0, image.width, image.height) + xywh = xywh_from_bbox(*box) + background = ImageStat.Stat(image).median[0] + new_image = Image.new(image.mode, (xywh['w'], xywh['h']), + background) # or 'white' + new_image.paste(image, (-xywh['x'], -xywh['y'])) + return new_image + +def image_from_polygon(image, polygon): + """"Mask an image with a polygon. + + Given a PIL.Image ``image`` and a numpy array ``polygon`` + of relative coordinates into the image, put everything + outside the polygon hull to the background. Since ``image`` + is not necessarily binarized yet, determine the background + from the median color (instead of white). + + Return a new PIL.Image. + """ + mask = polygon_mask(image, polygon) + # create a background image from its median color + # (in case it has not been binarized yet): + # array = np.asarray(image) + # background = np.median(array, axis=[0, 1], keepdims=True) + # array = np.broadcast_to(background.astype(np.uint8), array.shape) + background = ImageStat.Stat(image).median[0] + new_image = Image.new('L', image.size, background) + new_image.paste(image, mask=mask) + return new_image + def is_local_filename(url): """ Whether a url is a local filename. @@ -94,9 +273,22 @@ def is_string(val): """ return isinstance(val, str) +def membername(class_, val): + """Convert a member variable/constant into a member name string.""" + return next((k for k, v in class_.__dict__.items() if v == val), str(val)) + +def points_from_bbox(minx, miny, maxx, maxy): + """Construct polygon coordinates in page representation from a numeric list representing a bounding box.""" + return "%i,%i %i,%i %i,%i %i,%i" % ( + minx, miny, maxx, miny, maxx, maxy, minx, maxy) + +def points_from_polygon(polygon): + """Convert polygon coordinates from a numeric list representation to a page representation.""" + return " ".join("%i,%i" % (x, y) for x, y in polygon) + def points_from_xywh(box): """ - Constructs a polygon representation from a rectangle described as a dict with keys x, y, w, h. + Construct polygon coordinates in page representation from numeric dict representing a bounding box. """ x, y, w, h = box['x'], box['y'], box['w'], box['h'] # tesseract uses a different region representation format @@ -109,7 +301,7 @@ def points_from_xywh(box): def points_from_y0x0y1x1(yxyx): """ - Constructs a polygon representation from a rectangle described as a list [y0, x0, y1, x1] + Construct a polygon representation from a rectangle described as a list [y0, x0, y1, x1] """ y0 = yxyx[0] x0 = yxyx[1] @@ -124,7 +316,7 @@ def points_from_y0x0y1x1(yxyx): def points_from_x0y0x1y1(xyxy): """ - Constructs a polygon representation from a rectangle described as a list [x0, y0, x1, y1] + Construct a polygon representation from a rectangle described as a list [x0, y0, x1, y1] """ x0 = xyxy[0] y0 = xyxy[1] @@ -137,9 +329,13 @@ def points_from_x0y0x1y1(xyxy): x0, y1 ) +def polygon_from_bbox(minx, miny, maxx, maxy): + """Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box.""" + return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] + def polygon_from_points(points): """ - Constructs a numpy-compatible polygon from a page representation. + Convert polygon coordinates in page representation to polygon coordinates in numeric list representation. """ polygon = [] for pair in points.split(" "): @@ -147,6 +343,53 @@ def polygon_from_points(points): polygon.append([float(x_y[0]), float(x_y[1])]) return polygon +def polygon_from_x0y0x1y1(x0y0x1y1): + """Construct polygon coordinates in numeric list representation from a string list representing a bounding box.""" + minx = int(x0y0x1y1[0]) + miny = int(x0y0x1y1[1]) + maxx = int(x0y0x1y1[2]) + maxy = int(x0y0x1y1[3]) + return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] + +def polygon_from_xywh(xywh): + """Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box.""" + return polygon_from_bbox(*bbox_from_xywh(xywh)) + +def polygon_mask(image, coordinates): + """"Create a mask image of a polygon. + + Given a PIL.Image ``image`` (merely for dimensions), and + a numpy array ``polygon`` of relative coordinates into the image, + create a new image of the same size with black background, and + fill everything inside the polygon hull with white. + + Return the new PIL.Image. + """ + mask = Image.new('L', image.size, 0) + if isinstance(coordinates, np.ndarray): + coordinates = list(map(tuple, coordinates)) + ImageDraw.Draw(mask).polygon(coordinates, outline=1, fill=255) + return mask + +def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): + """Apply a passive rotation transformation to the given coordinates. + + Given a numpy array ``polygon`` of points and a rotation ``angle``, + as well as a numpy array ``orig`` of the center of rotation, + calculate the coordinate transform corresponding to the rotation + of the underlying image by ``angle`` degrees at ``center`` by + applying translation to the center, inverse rotation, + and translation from the center. + + Return a numpy array of the resulting polygon. + """ + angle = np.deg2rad(angle) # pylint: disable=assignment-from-no-return + cos = np.cos(angle) + sin = np.sin(angle) + # active rotation: [[cos, -sin], [sin, cos]] + # passive rotation: [[cos, sin], [-sin, cos]] (inverse) + return orig + np.dot(polygon - orig, np.array([[cos, sin], [-sin, cos]]).transpose()) + def safe_filename(url): """ Sanitize input to be safely used as the basename of a local file. @@ -163,9 +406,18 @@ def unzip_file_to_dir(path_to_zip, output_directory): z.extractall(output_directory) z.close() +def xywh_from_bbox(minx, miny, maxx, maxy): + """Convert a bounding box from a numeric list to a numeric dict representation.""" + return { + 'x': minx, + 'y': miny, + 'w': maxx - minx, + 'h': maxy - miny, + } + def xywh_from_points(points): """ - Constructs an dict representing a rectangle with keys x, y, w, h + Construct a numeric dict representing a bounding box from polygon coordinates in page representation. """ xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] minx = sys.maxsize diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt new file mode 100644 index 0000000000..b4ccf8ef14 --- /dev/null +++ b/ocrd_utils/requirements.txt @@ -0,0 +1,2 @@ +Pillow >= 5.3.0 +numpy >= 1.17.0 diff --git a/ocrd_utils/setup.py b/ocrd_utils/setup.py index 3e6f5a775e..953219c1b7 100644 --- a/ocrd_utils/setup.py +++ b/ocrd_utils/setup.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- from setuptools import setup +install_requires = open('requirements.txt').read().split('\n') + setup( name='ocrd_utils', version='1.0.0b15', @@ -12,6 +14,7 @@ url='https://github.com/OCR-D/core', license='Apache License 2.0', packages=['ocrd_utils'], + install_requires=install_requires, package_data={'': ['*.json', '*.yml', '*.xml']}, keywords=['OCR', 'OCR-D'] ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 35e2c4ff44..6bff8ba35d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,24 +1,36 @@ -from os import getcwd, chdir +from os import getcwd + +from PIL import Image + from tests.base import TestCase, main, assets from ocrd_utils import ( abspath, + + bbox_from_points, + bbox_from_xywh, + + concat_padded, is_local_filename, - points_from_xywh, is_string, - concat_padded, + membername, + + points_from_bbox, points_from_x0y0x1y1, - pushd_popd, + points_from_xywh, + points_from_polygon, + polygon_from_points, + polygon_from_x0y0x1y1, + xywh_from_points, + xywh_from_polygon, + pushd_popd, + ) from ocrd_models.utils import xmllint_format -from PIL import Image - class TestUtils(TestCase): - # def runTest(self): - def test_abspath(self): self.assertEqual(abspath('file:///'), '/') @@ -28,15 +40,41 @@ def test_is_local_filename(self): def test_points_from_xywh(self): self.assertEqual( points_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}), - '100,100 200,100 200,200 100,200' - ) + '100,100 200,100 200,200 100,200') + + def test_points_from_bbox(self): + self.assertEqual( + points_from_bbox(100, 100, 200, 200), + '100,100 200,100 200,200 100,200') + + def test_points_from_polygon(self): + self.assertEqual( + points_from_polygon([[100, 100], [200, 100], [200, 200], [100, 200]]), + '100,100 200,100 200,200 100,200') + + def test_polygon_from_x0y0x1y1(self): + self.assertEqual( + polygon_from_x0y0x1y1([100, 100, 200, 200]), + [[100, 100], [200, 100], [200, 200], [100, 200]]) def test_points_from_x0y0x1y1(self): self.assertEqual( points_from_x0y0x1y1([100, 100, 200, 200]), - '100,100 200,100 200,200 100,200' - ) + '100,100 200,100 200,200 100,200') + + def test_bbox_from_points(self): + self.assertEqual( + bbox_from_points('100,100 200,100 200,200 100,200'), (100, 100, 200, 200)) + + def test_bbox_from_xywh(self): + self.assertEqual( + bbox_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}), + (100, 100, 200, 200)) + def test_xywh_from_polygon(self): + self.assertEqual( + xywh_from_polygon([[100, 100], [200, 100], [200, 200], [100, 200]]), + {'x': 100, 'y': 100, 'w': 100, 'h': 100}) def test_xywh_from_points(self): self.assertEqual( @@ -67,6 +105,13 @@ def test_xmllint(self): pretty_xml = xmllint_format(xml_str).decode('utf-8') self.assertEqual(pretty_xml, '\n' + xml_str) + def test_membername(self): + class Klazz: + def __init__(self): + self.prop = 42 + instance = Klazz() + self.assertEqual(membername(instance, 42), 'prop') + def test_pil_version(self): """ Test segfault issue in PIL TiffImagePlugin