From 6fb3034dce91f381c1557995bd0910e1d674e647 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Aug 2019 15:52:11 +0200 Subject: [PATCH 1/3] atomic write of mets.xml, fix #278 --- ocrd/ocrd/workspace.py | 5 +++-- ocrd/requirements.txt | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 0cca59d8a9..da818fc53d 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -5,6 +5,7 @@ import cv2 from PIL import Image import numpy as np +from atomicwrites import atomic_write from ocrd_models import OcrdMets, OcrdExif from ocrd_utils import getLogger, is_local_filename, abspath, pushd_popd @@ -170,8 +171,8 @@ def save_mets(self): log.info("Saving mets '%s'" % self.mets_target) if self.automatic_backup: WorkspaceBackupManager(self).add() - with open(self.mets_target, 'wb') as f: - f.write(self.mets.to_xml(xmllint=True)) + with atomic_write(self.mets_target, overwrite=True) as f: + f.write(self.mets.to_xml(xmllint=True).decode('utf-8')) def resolve_image_exif(self, image_url): """ diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index 31601f2e1f..6a2f8eb132 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -9,3 +9,4 @@ opencv-python-headless Flask jsonschema pyyaml +atomicwrites >= 1.3.0 From a3233a9183317f03c7f013d4fe868e8c833c39a8 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 15 Aug 2019 16:21:33 +0200 Subject: [PATCH 2/3] :memo: changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61fadb98a6..1a7ec9b228 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Versioned according to [Semantic Versioning](http://semver.org/). Changed: * workspace bagger will create files with extension + * `save_mets` is atomic now, #278 ## [1.0.0b15] - 2019-08-14 From 39cce9fa34f21353110ed12bbd1f99bb26e737b6 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 16:28:49 +0200 Subject: [PATCH 3/3] WorkspaceBackupManager.add is atomic now, #278 --- CHANGELOG.md | 2 +- ocrd/ocrd/workspace_backup.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a7ec9b228..5620ae25e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ Versioned according to [Semantic Versioning](http://semver.org/). Changed: * workspace bagger will create files with extension - * `save_mets` is atomic now, #278 + * `save_mets` is atomic now, #278, #285 ## [1.0.0b15] - 2019-08-14 diff --git a/ocrd/ocrd/workspace_backup.py b/ocrd/ocrd/workspace_backup.py index a83e198765..57030abc94 100644 --- a/ocrd/ocrd/workspace_backup.py +++ b/ocrd/ocrd/workspace_backup.py @@ -10,6 +10,8 @@ from .constants import BACKUP_DIR +from atomicwrites import atomic_write + def _chksum(s): return hashlib.sha256(s).hexdigest() @@ -83,8 +85,8 @@ def add(self): mets_file = join(d, 'mets.xml') log.info("Backing up to %s" % mets_file) makedirs(d) - with open(mets_file, 'wb') as f: - f.write(mets_str) + with atomic_write(mets_file, overwrite=True) as f: + f.write(mets_str.decode('utf-8')) return chksum def list(self):