Skip to content

Commit

Permalink
🔥 remove commons, adapt to OCR-D/core#268
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Aug 7, 2019
1 parent c17341e commit 5d4a3ca
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 260 deletions.
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ disable =
too-many-locals,
too-few-public-methods,
wrong-import-order,
duplicate-code
duplicate-code,
fixme

This comment has been minimized.

Copy link
@bertsky

bertsky Aug 12, 2019

Really? (I thought that seeing those FIXMEs in the report one by one is a major reason for running pylint?)


# allow indented whitespace (as required by interpreter):
no-space-check=empty-line
Expand Down
23 changes: 9 additions & 14 deletions ocrd_tesserocr/binarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

from ocrd_utils import (
getLogger, concat_padded,
MIMETYPE_PAGE,
membername
MIMETYPE_PAGE
)
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import (
Expand All @@ -22,11 +21,6 @@
from ocrd import Processor

from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
image_from_page,
image_from_segment,
save_image_file
)

TOOL = 'ocrd-tesserocr-binarize'
LOG = getLogger('processor.TesserocrBinarize')
Expand All @@ -52,6 +46,7 @@ def process(self):
Produce a new output file by serialising the resulting hierarchy.
"""
# pylint: disable=attribute-defined-outside-init
try:
self.page_grp, self.image_grp = self.output_file_grp.split(',')
except ValueError:
Expand All @@ -77,16 +72,16 @@ def process(self):
value=self.parameter[name])
for name in self.parameter.keys()])]))
page = pcgts.get_Page()
page_image, page_xywh, _ = image_from_page(
self.workspace, page, page_id)
page_image, page_xywh, _ = self.workspace.image_from_page(
page, page_id)
LOG.info("Binarizing on '%s' level in page '%s'", oplevel, page_id)

regions = page.get_TextRegion() + page.get_TableRegion()
if not regions:
LOG.warning("Page '%s' contains no text regions", page_id)
for region in regions:
region_image, region_xywh = image_from_segment(
self.workspace, region, page_image, page_xywh)
region_image, region_xywh = self.workspace.image_from_segment(
region, page_image, page_xywh)
if oplevel == 'region':
tessapi.SetPageSegMode(PSM.SINGLE_BLOCK)
self._process_segment(tessapi, RIL.BLOCK, region, region_image, region_xywh,
Expand All @@ -98,8 +93,8 @@ def process(self):
LOG.warning("Page '%s' region '%s' contains no text lines",
page_id, region.id)
for line in lines:
line_image, line_xywh = image_from_segment(
self.workspace, line, region_image, region_xywh)
line_image, line_xywh = self.workspace.image_from_segment(
line, region_image, region_xywh)
tessapi.SetPageSegMode(PSM.SINGLE_LINE)
self._process_segment(tessapi, RIL.TEXTLINE, line, line_image, line_xywh,
"line '%s'" % line.id, input_file.pageId,
Expand Down Expand Up @@ -129,7 +124,7 @@ def _process_segment(self, tessapi, ril, segment, image, xywh, where, page_id, f
LOG.error('Cannot binarize %s', where)
return
# update METS (add the image file):
file_path = save_image_file(self.workspace, image_bin,
file_path = self.workspace.save_image_file(image_bin,
file_id,
page_id=page_id,
file_grp=self.image_grp)
Expand Down
183 changes: 0 additions & 183 deletions ocrd_tesserocr/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,186 +18,3 @@
LOG = getLogger('') # to be refined by importer

This comment has been minimized.

Copy link
@bertsky

bertsky Aug 12, 2019

Could you delete the file altogether (to avoid confusion in the future)?



# to be refactored into core (as method of ocrd.workspace.Workspace):
# to be refactored into core (as method of ocrd.workspace.Workspace):
def image_from_page(workspace, page, page_id):
"""Extract the Page image from the workspace.
Given a PageType object, `page`, extract its PIL.Image from
AlternativeImage if it exists. Otherwise extract the PIL.Image
from imageFilename and crop it if a Border exists. Otherwise
just return it.
When cropping, respect any orientation angle annotated for
the page (from page-level deskewing) by rotating the
cropped image, respectively.
If the resulting page image is larger than the bounding box of
`page`, pass down the page's box coordinates with an offset of
half the width/height difference.
Return the extracted image, and the absolute coordinates of
the page's bounding box / border (for passing down), and
an OcrdExif instance associated with the original image.
"""
page_image = workspace.resolve_image_as_pil(page.imageFilename)
page_image_info = OcrdExif(page_image)
page_xywh = {'x': 0,
'y': 0,
'w': page_image.width,
'h': page_image.height}
# region angle: PAGE orientation is defined clockwise,
# whereas PIL/ndimage rotation is in mathematical direction:
page_xywh['angle'] = -(page.get_orientation() or 0)
# FIXME: remove PrintSpace here as soon as GT abides by the PAGE standard:
border = page.get_Border() or page.get_PrintSpace()
if border:
page_points = border.get_Coords().points
LOG.debug("Using explictly set page border '%s' for page '%s'",
page_points, page_id)
page_xywh = xywh_from_points(page_points)

alternative_image = page.get_AlternativeImage()
if alternative_image:
# (e.g. from page-level cropping, binarization, deskewing or despeckling)
# assumes implicit cropping (i.e. page_xywh has been applied already)
LOG.debug("Using AlternativeImage %d (%s) for page '%s'",
len(alternative_image), alternative_image[-1].get_comments(),
page_id)
page_image = workspace.resolve_image_as_pil(
alternative_image[-1].get_filename())
elif border:
# get polygon outline of page border:
page_polygon = np.array(polygon_from_points(page_points))
# create a mask from the page polygon:
page_image = image_from_polygon(page_image, page_polygon)
# recrop into page rectangle:
page_image = crop_image(page_image,
box=(page_xywh['x'],
page_xywh['y'],
page_xywh['x'] + page_xywh['w'],
page_xywh['y'] + page_xywh['h']))
if 'angle' in page_xywh and page_xywh['angle']:
LOG.info("About to rotate page '%s' by %.2f°",
page_id, page_xywh['angle'])
page_image = page_image.rotate(page_xywh['angle'],
expand=True,
#resample=Image.BILINEAR,
fillcolor='white')
# subtract offset from any increase in binary region size over source:
page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w']))
page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h']))
return page_image, page_xywh, page_image_info

# to be refactored into core (as method of ocrd.workspace.Workspace):
def image_from_segment(workspace, segment, parent_image, parent_xywh):
"""Extract a segment image from its parent's image.
Given a PIL.Image of the parent, `parent_image`, and
its absolute coordinates, `parent_xywh`, and a PAGE
segment (TextRegion / TextLine / Word / Glyph) object
logically contained in it, `segment`, extract its PIL.Image
from AlternativeImage (if it exists), or via cropping from
`parent_image`.
When cropping, respect any orientation angle annotated for
the parent (from parent-level deskewing) by compensating the
segment coordinates in an inverse transformation (translation
to center, rotation, re-translation).
Also, mind the difference between annotated and actual size
of the parent (usually from deskewing), by a respective offset
into the image. Cropping uses a polygon mask (not just the
rectangle).
When cropping, respect any orientation angle annotated for
the segment (from segment-level deskewing) by rotating the
cropped image, respectively.
If the resulting segment image is larger than the bounding box of
`segment`, pass down the segment's box coordinates with an offset
of half the width/height difference.
Return the extracted image, and the absolute coordinates of
the segment's bounding box (for passing down).
"""
segment_xywh = xywh_from_points(segment.get_Coords().points)
if 'orientation' in segment.__dict__:
# angle: PAGE orientation is defined clockwise,
# whereas PIL/ndimage rotation is in mathematical direction:
segment_xywh['angle'] = -(segment.get_orientation() or 0)
alternative_image = segment.get_AlternativeImage()
if alternative_image:
# (e.g. from segment-level cropping, binarization, deskewing or despeckling)
LOG.debug("Using AlternativeImage %d (%s) for segment '%s'",
len(alternative_image), alternative_image[-1].get_comments(),
segment.id)
segment_image = workspace.resolve_image_as_pil(
alternative_image[-1].get_filename())
else:
# get polygon outline of segment relative to parent image:
segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh)
# create a mask from the segment polygon:
segment_image = image_from_polygon(parent_image, segment_polygon)
# recrop into segment rectangle:
segment_image = crop_image(segment_image,
box=(segment_xywh['x'] - parent_xywh['x'],
segment_xywh['y'] - parent_xywh['y'],
segment_xywh['x'] - parent_xywh['x'] + segment_xywh['w'],
segment_xywh['y'] - parent_xywh['y'] + segment_xywh['h']))
# note: We should mask overlapping neighbouring segments here,
# but finding the right clipping rules can be difficult if operating
# on the raw (non-binary) image data alone: for each intersection, it
# must be decided which one of either segment or neighbour to assign,
# e.g. an ImageRegion which properly contains our TextRegion should be
# completely ignored, but an ImageRegion which is properly contained
# in our TextRegion should be completely masked, while partial overlap
# may be more difficult to decide. On the other hand, on the binary image,
# we can use connected component analysis to mask foreground areas which
# originate in the neighbouring regions. But that would introduce either
# the assumption that the input has already been binarized, or a dependency
# on some ad-hoc binarization method. Thus, it is preferable to use
# a dedicated processor for this (which produces clipped AlternativeImage
# or reduced polygon coordinates).
if 'angle' in segment_xywh and segment_xywh['angle']:
LOG.info("About to rotate segment '%s' by %.2f°",
segment.id, segment_xywh['angle'])
segment_image = segment_image.rotate(segment_xywh['angle'],
expand=True,
#resample=Image.BILINEAR,
fillcolor='white')
# subtract offset from any increase in binary region size over source:
segment_xywh['x'] -= round(0.5 * max(0, segment_image.width - segment_xywh['w']))
segment_xywh['y'] -= round(0.5 * max(0, segment_image.height - segment_xywh['h']))
return segment_image, segment_xywh

# to be refactored into core (as method of ocrd.workspace.Workspace):
def save_image_file(workspace, image,
file_id,
page_id=None,
file_grp='OCR-D-IMG', # or -BIN?
format='PNG',
force=True):
"""Store and reference an image as file into the workspace.
Given a PIL.Image `image`, and an ID `file_id` to use in METS,
store the image under the fileGrp `file_grp` and physical page
`page_id` into the workspace (in a file name based on
the `file_grp`, `file_id` and `format` extension).
Return the (absolute) path of the created file.
"""
image_bytes = io.BytesIO()
image.save(image_bytes, format=format)
file_path = os.path.join(file_grp,
file_id + '.' + format.lower())
out = workspace.add_file(
ID=file_id,
file_grp=file_grp,
pageId=page_id,
local_filename=file_path,
mimetype='image/' + format.lower(),
content=image_bytes.getvalue(),
force=force)
LOG.info('created file ID: %s, file_grp: %s, path: %s',
file_id, file_grp, out.local_filename)
return file_path
7 changes: 2 additions & 5 deletions ocrd_tesserocr/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
from ocrd import Processor

from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
save_image_file
)

TOOL = 'ocrd-tesserocr-crop'
LOG = getLogger('processor.TesserocrCrop')
Expand Down Expand Up @@ -113,7 +110,7 @@ def process(self):
# iterate over all text blocks and compare their
# bbox extent to the running min and max values
for component in tessapi.GetComponentImages(tesserocr.RIL.BLOCK, True):
image, xywh, index, para = component
image, xywh, index, _ = component
#
# the region reference in the reading order element
#
Expand Down Expand Up @@ -163,7 +160,7 @@ def process(self):
file_id = input_file.ID.replace(self.input_file_grp, FILEGRP_IMG)
if file_id == input_file.ID:
file_id = concat_padded(FILEGRP_IMG, n)
file_path = save_image_file(self.workspace, page_image,
file_path = self.workspace.save_image_file(page_image,
file_id,
page_id=page_id,
file_grp=FILEGRP_IMG)
Expand Down
23 changes: 9 additions & 14 deletions ocrd_tesserocr/deskew.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@
from ocrd import Processor

from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
image_from_page,
image_from_segment,
save_image_file,
)

TOOL = 'ocrd-tesserocr-deskew'
LOG = getLogger('processor.TesserocrDeskew')
Expand All @@ -45,19 +40,19 @@ def __init__(self, *args, **kwargs):

def process(self):
"""Performs deskewing of the page / region with Tesseract on the workspace.
Open and deserialise PAGE input files and their respective images,
then iterate over the element hierarchy down to the region level
for all text and table regions.
Set up Tesseract to recognise the region image's orientation, skew
and script (with both OSD and AnalyseLayout). Rotate the image
accordingly, and annotate the angle, readingDirection and textlineOrder.
Create a corresponding image file, and reference it as AlternativeImage
in the region element and as file with a fileGrp USE `OCR-D-IMG-DESKEW`
in the workspace.
Produce a new output file by serialising the resulting hierarchy.
"""
oplevel = self.parameter['operation_level']
Expand All @@ -84,8 +79,8 @@ def process(self):
value=self.parameter[name])
for name in self.parameter.keys()])]))
page = pcgts.get_Page()
page_image, page_xywh, page_image_info = image_from_page(
self.workspace, page, page_id)
page_image, page_xywh, page_image_info = self.workspace.image_from_page(
page, page_id)
if page_image_info.xResolution != 1:
dpi = page_image_info.xResolution
if page_image_info.resolutionUnit == 'cm':
Expand All @@ -102,8 +97,8 @@ def process(self):
if not regions:
LOG.warning("Page '%s' contains no text regions", page_id)
for region in regions:
region_image, region_xywh = image_from_segment(
self.workspace, region, page_image, page_xywh)
region_image, region_xywh = self.workspace.image_from_segment(
region, page_image, page_xywh)
self._process_segment(tessapi, region, region_image, region_xywh,
"region '%s'" % region.id, input_file.pageId,
file_id + '_' + region.id)
Expand Down Expand Up @@ -269,7 +264,7 @@ def _process_segment(self, tessapi, segment, image, xywh, where, page_id, file_i
# points = points_from_x0y0x1y1(list(baseline[0]) + list(baseline[1]))
# segment.add_Baseline(BaselineType(points=points))
# update METS (add the image file):
file_path = save_image_file(self.workspace, image,
file_path = self.workspace.save_image_file(image,
file_id,
page_id=page_id,
file_grp=FILEGRP_IMG)
Expand Down
Loading

0 comments on commit 5d4a3ca

Please sign in to comment.