Skip to content

Commit

Permalink
Merge branch 'master' into fix-rotation
Browse files Browse the repository at this point in the history
  • Loading branch information
kba authored Oct 22, 2019
2 parents 27140d8 + e71eff2 commit 5b05737
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 21 deletions.
6 changes: 2 additions & 4 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
*
!ocrd
!ocrd*
!Makefile
!setup.py
!requirements.txt
!LICENSE
!README.rst
!README.md
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Versioned according to [Semantic Versioning](http://semver.org/).
## Unreleased

* image_from_page etc: allow filling with background or transparency

## [1.0.0] - 2019-10-18

* Workspace validation: Validate that files mentioned in pc:Page/@imageFilename exist in METS and on FS, #309
* `ocrd ocrd-tool parse-params` has the string-or-filepath logic for -p/--parameter as for the [CLI](https://ocr-d.github.io/cli#-p---parameter-param_json)

Expand Down Expand Up @@ -588,6 +591,9 @@ Fixed
Initial Release

<!-- link-labels -->
[1.0.0]: ../../compare/v1.0.0...v1.0.0b19
[1.0.0b19]: ../../compare/v1.0.0b19...v1.0.0b18
[1.0.0b18]: ../../compare/v1.0.0b18...v1.0.0b17
[1.0.0b17]: ../../compare/v1.0.0b17...v1.0.0b16
[1.0.0b16]: ../../compare/v1.0.0b16...v1.0.0b15
[1.0.0b15]: ../../compare/v1.0.0b15...v1.0.0b14
Expand Down
20 changes: 11 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:19.10
MAINTAINER OCR-D
ENV DEBIAN_FRONTEND noninteractive
ENV PYTHONIOENCODING utf8
Expand All @@ -7,23 +7,25 @@ ENV LANG C.UTF-8

WORKDIR /build-ocrd
COPY ocrd ./ocrd
COPY ocrd_modelfactory ./ocrd_modelfactory/
COPY ocrd_models ./ocrd_models
COPY ocrd_utils ./ocrd_utils
COPY ocrd_validators/ ./ocrd_validators
COPY Makefile .
COPY setup.py .
COPY requirements.txt .
COPY README.rst .
COPY README.md .
COPY LICENSE .
RUN apt-get update && \
apt-get -y install --no-install-recommends \
ca-certificates \
make \
sudo \
git \
libglib2.0.0 \
libsm6 \
libxrender1 \
libxext6
libglib2.0.0
# libxext6
# libsm6 \
# libxrender1 \
RUN make deps-ubuntu
RUN pip3 install --upgrade pip
RUN make deps install
RUN make deps-ubuntu install

ENTRYPOINT ["/usr/local/bin/ocrd"]
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd

FIND_VERSION = grep version= ocrd_utils/setup.py|grep -Po "([0-9ab]+\.?)+"

# Additional arguments to docker build. Default: '$(DOCKER_ARGS)'
DOCKER_ARGS =

# BEGIN-EVAL makefile-parser --make-help Makefile

help:
Expand Down Expand Up @@ -44,6 +47,7 @@ help:
@echo " Variables"
@echo ""
@echo " PAGE_VERSION PAGE schema version to use. Default: '$(PAGE_VERSION)'"
@echo " DOCKER_ARGS Additional arguments to docker build. Default: '$(DOCKER_ARGS)'"
@echo " DOCKER_TAG Docker tag."
@echo " PIP_INSTALL pip install command. Default: $(PIP_INSTALL)"

Expand All @@ -57,7 +61,7 @@ PIP_INSTALL = pip install

# Dependencies for deployment in an ubuntu/debian linux
deps-ubuntu:
sudo apt install -y python3 python3-pip
sudo apt-get install -y python3 python3-pip

# Install test python deps via pip
deps-test:
Expand Down Expand Up @@ -181,7 +185,7 @@ pyclean:

# Build docker image
docker:
docker build -t $(DOCKER_TAG) .
docker build -t $(DOCKER_TAG) $(DOCKER_ARGS) .

#
# bash library
Expand Down
2 changes: 1 addition & 1 deletion ocrd/ocrd/cli/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def workspace_cli(ctx, directory, mets_basename, backup):
''')
@pass_workspace
@click.option('-a', '--download', is_flag=True, help="Download all files")
@click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(['mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'pixel_density', 'page', 'url']))
@click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(['imagefilename', 'dimension', 'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'pixel_density', 'page', 'url']))
@click.option('--page-strictness', help="How strict to check PAGE consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
@click.argument('mets_url')
def validate_workspace(ctx, mets_url, download, skip, page_strictness):
Expand Down
2 changes: 1 addition & 1 deletion ocrd/ocrd/cli/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def spill(dest, src):
"""
resolver = Resolver()
workspace_bagger = WorkspaceBagger(resolver)
workspace = workspace_bagger.spill(src, directory)
workspace = workspace_bagger.spill(src, dest)
print(workspace)

# ----------------------------------------------------------------------
Expand Down
36 changes: 33 additions & 3 deletions ocrd/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from os import makedirs, chdir, walk, getcwd
from os import makedirs, chdir, walk
from os.path import join, isdir, basename, exists, relpath
from shutil import make_archive, rmtree, copyfile, move
from tempfile import mkdtemp
Expand All @@ -16,9 +16,12 @@
is_local_filename,
unzip_file_to_dir,

MIMETYPE_PAGE,
VERSION,
)
from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import to_xml

from .workspace import Workspace

Expand Down Expand Up @@ -58,9 +61,11 @@ def _log_or_raise(self, msg):

def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes):
mets = workspace.mets
changed_urls = {}

# TODO allow filtering by fileGrp@USE and such
with pushd_popd(workspace.directory):
# URLs of the files before changing
for f in mets.find_files():
log.info("Resolving %s (%s)", f.url, ocrd_manifestation_depth)
if is_local_filename(f.url):
Expand All @@ -77,15 +82,40 @@ def _bag_mets_files(self, workspace, bagdir, ocrd_manifestation_depth, ocrd_mets
file_grp_dir = join(bagdir, 'data', f.fileGrp)
if not isdir(file_grp_dir):
makedirs(file_grp_dir)
self.resolver.download_to_directory(file_grp_dir, f.url, basename="%s%s" % (f.ID, f.extension))
f.url = join(f.fileGrp, f.ID + f.extension)

_basename = "%s%s" % (f.ID, f.extension)
_relpath = join(f.fileGrp, _basename)
self.resolver.download_to_directory(file_grp_dir, f.url, basename=_basename)
changed_urls[f.url] = _relpath
f.url = _relpath

# save mets.xml
with open(join(bagdir, 'data', ocrd_mets), 'wb') as f:
f.write(workspace.mets.to_xml())

# Walk through bagged workspace and fix the PAGE
# Page/@imageFilename and
# AlternativeImage/@filename
bag_workspace = Workspace(self.resolver, directory=join(bagdir, 'data'))
with pushd_popd(bag_workspace.directory):
for page_file in bag_workspace.mets.find_files(mimetype=MIMETYPE_PAGE):
pcgts = page_from_file(page_file)
changed = False
# page_doc.set(imageFileName
# for old, new in changed_urls:
for old, new in changed_urls.items():
if pcgts.get_Page().imageFilename == old:
pcgts.get_Page().imageFilename = new
changed = True
# TODO replace AlternativeImage, recursively...
if changed:
with open(page_file.url, 'w') as out:
out.write(to_xml(pcgts))
# log.info("Replace %s -> %s in %s" % (old, new, page_file))

chdir(bagdir)
total_bytes, total_files = make_manifests('data', processes, algorithms=['sha512'])
log.info("New vs. old: %s" % changed_urls)
return total_bytes, total_files

def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_manifestation_depth, ocrd_base_version_checksum):
Expand Down
2 changes: 1 addition & 1 deletion ocrd_utils/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='ocrd_utils',
version='1.0.0b19',
version='1.0.0',
description='OCR-D framework - shared code, helpers, constants',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
Expand Down

0 comments on commit 5b05737

Please sign in to comment.