Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapt to utils moved to core, #49 #66

Merged
merged 6 commits into from
Aug 21, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ocrd_tesserocr/binarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from ocrd_utils import (
getLogger, concat_padded,
MIMETYPE_PAGE
MIMETYPE_PAGE,
membername
)
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import (
Expand All @@ -24,8 +25,7 @@
from .common import (
image_from_page,
image_from_segment,
save_image_file,
membername
save_image_file
)

TOOL = 'ocrd-tesserocr-binarize'
Expand Down
226 changes: 8 additions & 218 deletions ocrd_tesserocr/common.py
Original file line number Diff line number Diff line change
@@ -1,151 +1,24 @@
from __future__ import absolute_import

import os.path
import sys
import io

import numpy as np
from PIL import Image, ImageDraw, ImageStat

from ocrd_models import OcrdExif
from ocrd_utils import getLogger, xywh_from_points, polygon_from_points
from ocrd_utils import (
getLogger,
coordinates_of_segment,
xywh_from_points,
polygon_from_points,
image_from_polygon,
crop_image,
)

LOG = getLogger('') # to be refined by importer

# to be refactored into core (as function in ocrd_utils):
def polygon_mask(image, coordinates):
""""Create a mask image of a polygon.

Given a PIL.Image `image` (merely for dimensions), and
a numpy array `polygon` of relative coordinates into the image,
create a new image of the same size with black background, and
fill everything inside the polygon hull with white.

Return the new PIL.Image.
"""
mask = Image.new('L', image.size, 0)
if isinstance(coordinates, np.ndarray):
coordinates = list(map(tuple, coordinates))
ImageDraw.Draw(mask).polygon(coordinates, outline=1, fill=255)
return mask

# to be refactored into core (as function in ocrd_utils):
def image_from_polygon(image, polygon):
""""Mask an image with a polygon.

Given a PIL.Image `image` and a numpy array `polygon`
of relative coordinates into the image, put everything
outside the polygon hull to the background. Since `image`
is not necessarily binarized yet, determine the background
from the median color (instead of white).

Return a new PIL.Image.
"""
mask = polygon_mask(image, polygon)
# create a background image from its median color
# (in case it has not been binarized yet):
# array = np.asarray(image)
# background = np.median(array, axis=[0, 1], keepdims=True)
# array = np.broadcast_to(background.astype(np.uint8), array.shape)
background = ImageStat.Stat(image).median[0]
new_image = Image.new('L', image.size, background)
new_image.paste(image, mask=mask)
return new_image

# to be refactored into core (as function in ocrd_utils):
def crop_image(image, box=None):
""""Crop an image to a rectangle, filling with background.

Given a PIL.Image `image` and a list `box` of the bounding
rectangle relative to the image, crop at the box coordinates,
filling everything outside `image` with the background.
(This covers the case where `box` indexes are negative or
larger than `image` width/height. PIL.Image.crop would fill
with black.) Since `image` is not necessarily binarized yet,
determine the background from the median color (instead of
white).

Return a new PIL.Image.
"""
# todo: perhaps we should issue a warning if we encounter this
# (It should be invalid in PAGE-XML to extend beyond parents.)
if not box:
box = (0, 0, image.width, image.height)
xywh = xywh_from_bbox(*box)
background = ImageStat.Stat(image).median[0]
new_image = Image.new(image.mode, (xywh['w'], xywh['h']),
background) # or 'white'
new_image.paste(image, (-xywh['x'], -xywh['y']))
return new_image

# to be refactored into core (as function in ocrd_utils):
def rotate_coordinates(polygon, angle, orig=np.array([0, 0])):
"""Apply a passive rotation transformation to the given coordinates.

Given a numpy array `polygon` of points and a rotation `angle`,
as well as a numpy array `orig` of the center of rotation,
calculate the coordinate transform corresponding to the rotation
of the underlying image by `angle` degrees at `center` by
applying translation to the center, inverse rotation,
and translation from the center.

Return a numpy array of the resulting polygon.
"""
angle = np.deg2rad(angle)
cos = np.cos(angle)
sin = np.sin(angle)
# active rotation: [[cos, -sin], [sin, cos]]
# passive rotation: [[cos, sin], [-sin, cos]] (inverse)
return orig + np.dot(polygon - orig, np.array([[cos, sin], [-sin, cos]]).transpose())

# to be refactored into core (as method of ocrd.workspace.Workspace):
wrznr marked this conversation as resolved.
Show resolved Hide resolved
def coordinates_of_segment(segment, parent_image, parent_xywh):
"""Extract the relative coordinates polygon of a PAGE segment element.

Given a Region / TextLine / Word / Glyph `segment` and
the PIL.Image of its parent Page / Region / TextLine / Word
along with its bounding box, calculate the relative coordinates
of the segment within the image. That is, shift all points from
the offset of the parent, and (in case the parent was rotated,)
rotate all points with the center of the image as origin.

Return the rounded numpy array of the resulting polygon.
"""
# get polygon:
polygon = np.array(polygon_from_points(segment.get_Coords().points))
# offset correction (shift coordinates to base of segment):
polygon -= np.array([parent_xywh['x'], parent_xywh['y']])
# angle correction (rotate coordinates if image has been rotated):
if 'angle' in parent_xywh:
polygon = rotate_coordinates(
polygon, parent_xywh['angle'],
orig=np.array([0.5 * parent_image.width,
0.5 * parent_image.height]))
return np.round(polygon).astype(np.int32)

# to be refactored into core (as method of ocrd.workspace.Workspace):
def coordinates_for_segment(polygon, parent_image, parent_xywh):
"""Convert a relative coordinates polygon to absolute.

Given a numpy array `polygon` of points, and a parent PIL.Image
along with its bounding box to which the coordinates are relative,
calculate the absolute coordinates within the page.
That is, (in case the parent was rotated,) rotate all points in
opposite direction with the center of the image as origin, then
shift all points to the offset of the parent.

Return the rounded numpy array of the resulting polygon.
"""
# angle correction (unrotate coordinates if image has been rotated):
if 'angle' in parent_xywh:
polygon = rotate_coordinates(
polygon, -parent_xywh['angle'],
orig=np.array([0.5 * parent_image.width,
0.5 * parent_image.height]))
# offset correction (shift coordinates from base of segment):
polygon += np.array([parent_xywh['x'], parent_xywh['y']])
return np.round(polygon).astype(np.uint32)

# to be refactored into core (as method of ocrd.workspace.Workspace):
def image_from_page(workspace, page, page_id):
"""Extract the Page image from the workspace.
Expand Down Expand Up @@ -328,86 +201,3 @@ def save_image_file(workspace, image,
LOG.info('created file ID: %s, file_grp: %s, path: %s',
file_id, file_grp, out.local_filename)
return file_path

# to be refactored into core (as function in ocrd_utils):
def bbox_from_points(points):
"""Construct a numeric list representing a bounding box from polygon coordinates in page representation."""
xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')]
return bbox_from_polygon(xys)

# to be refactored into core (as function in ocrd_utils):
def points_from_bbox(minx, miny, maxx, maxy):
"""Construct polygon coordinates in page representation from a numeric list representing a bounding box."""
return "%i,%i %i,%i %i,%i %i,%i" % (
minx, miny, maxx, miny, maxx, maxy, minx, maxy)

# to be refactored into core (as function in ocrd_utils):
def xywh_from_bbox(minx, miny, maxx, maxy):
"""Convert a bounding box from a numeric list to a numeric dict representation."""
return {
'x': minx,
'y': miny,
'w': maxx - minx,
'h': maxy - miny,
}

# to be refactored into core (as function in ocrd_utils):
def bbox_from_xywh(xywh):
"""Convert a bounding box from a numeric dict to a numeric list representation."""
return (
xywh['x'],
xywh['y'],
xywh['x'] + xywh['w'],
xywh['y'] + xywh['h']
)

# to be refactored into core (as function in ocrd_utils):
def points_from_polygon(polygon):
"""Convert polygon coordinates from a numeric list representation to a page representation."""
return " ".join("%i,%i" % (x, y) for x, y in polygon)

# to be refactored into core (as function in ocrd_utils):
def xywh_from_polygon(polygon):
"""Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation."""
return xywh_from_bbox(*bbox_from_polygon(polygon))

# to be refactored into core (as function in ocrd_utils):
def polygon_from_xywh(xywh):
"""Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box."""
return polygon_from_bbox(*bbox_from_xywh(xywh))

# to be refactored into core (as function in ocrd_utils):
def bbox_from_polygon(polygon):
"""Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation."""
minx = sys.maxsize
miny = sys.maxsize
maxx = 0
maxy = 0
for xy in polygon:
if xy[0] < minx:
minx = xy[0]
if xy[0] > maxx:
maxx = xy[0]
if xy[1] < miny:
miny = xy[1]
if xy[1] > maxy:
maxy = xy[1]
return minx, miny, maxx, maxy

# to be refactored into core (as function in ocrd_utils):
def polygon_from_bbox(minx, miny, maxx, maxy):
"""Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box."""
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]]

# to be refactored into core (as function in ocrd_utils):
def polygon_from_x0y0x1y1(x0y0x1y1):
"""Construct polygon coordinates in numeric list representation from a string list representing a bounding box."""
minx = int(x0y0x1y1[0])
miny = int(x0y0x1y1[1])
maxx = int(x0y0x1y1[2])
maxy = int(x0y0x1y1[3])
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]]

def membername(class_, val):
"""Convert a member variable/constant into a member name string."""
return next((k for k, v in class_.__dict__.items() if v == val), str(val))
4 changes: 2 additions & 2 deletions ocrd_tesserocr/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tesserocr
from ocrd_utils import (
getLogger, concat_padded,
bbox_from_points, points_from_bbox, bbox_from_xywh,
MIMETYPE_PAGE
)
from ocrd_modelfactory import page_from_file
Expand All @@ -19,8 +20,7 @@

from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
bbox_from_points, points_from_bbox,
bbox_from_xywh, save_image_file
save_image_file
)

TOOL = 'ocrd-tesserocr-crop'
Expand Down
2 changes: 1 addition & 1 deletion ocrd_tesserocr/deskew.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from ocrd_utils import (
getLogger, concat_padded,
membername,
MIMETYPE_PAGE
)
from ocrd_modelfactory import page_from_file
Expand All @@ -29,7 +30,6 @@
image_from_page,
image_from_segment,
save_image_file,
membername
)

TOOL = 'ocrd-tesserocr-deskew'
Expand Down
17 changes: 10 additions & 7 deletions ocrd_tesserocr/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@
PyTessBaseAPI, get_languages)

from ocrd_utils import (
getLogger, concat_padded,
getLogger,
concat_padded,
points_from_x0y0x1y1,
xywh_from_points, points_from_xywh,
MIMETYPE_PAGE)
xywh_from_points,
points_from_xywh,
points_from_polygon,
xywh_from_polygon,
polygon_from_x0y0x1y1,
coordinates_for_segment,
MIMETYPE_PAGE
)
from ocrd_models.ocrd_page import (
CoordsType,
GlyphType, WordType,
Expand All @@ -22,10 +29,6 @@

from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
points_from_polygon,
xywh_from_polygon,
polygon_from_x0y0x1y1,
coordinates_for_segment,
image_from_page,
image_from_segment
)
Expand Down
18 changes: 9 additions & 9 deletions ocrd_tesserocr/segment_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
)

from ocrd_utils import (
getLogger, concat_padded,
getLogger,
concat_padded,
points_from_x0y0x1y1,
points_from_xywh, xywh_from_points,
MIMETYPE_PAGE)
points_from_xywh,
xywh_from_points,
MIMETYPE_PAGE,
points_from_polygon,
membername
)
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import (
MetadataItemType,
Expand All @@ -29,12 +34,7 @@
from ocrd import Processor

from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
image_from_page,
save_image_file,
points_from_polygon,
membername
)
from .common import save_image_file, image_from_page

TOOL = 'ocrd-tesserocr-segment-region'
LOG = getLogger('processor.TesserocrSegmentRegion')
Expand Down