Skip to content

Commit

Permalink
Merge pull request #21 from slub/layouts
Browse files Browse the repository at this point in the history
Add Layout handling
  • Loading branch information
rue-a authored Apr 26, 2024
2 parents 9d14212 + c716c6c commit 55fe416
Show file tree
Hide file tree
Showing 15 changed files with 234,707 additions and 2,038 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ dist
*~
.vscode
ocrd.log
ocr_test
tests/workspace/OCR-D-IMG
tests/workspace/OCR-D-SEG-PAGE
3 changes: 2 additions & 1 deletion tests/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ def setUp(self):
{
"aws": Path("textract_responses")
/ f"{filename.name.split('.', 1)[0]}.json",
"img": Path("images") / filename,
"img": Path("images") / filename.name,
"xml": Path("reference_page_xml")
/ f"{filename.name.split('.', 1)[0]}.xml",
}
for filename in (workspace / "images").iterdir()
]
print(self.test_path_dict)

def test_api(self):
for path in self.test_path_dict:
Expand Down
Binary file added tests/workspace/images/nd1969-01-21_3.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/workspace/images/nowa_doba.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
42 changes: 32 additions & 10 deletions tests/workspace/mets.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
<mets:metsHdr CREATEDATE="2023-06-21T13:50:00.807482">
<mets:metsHdr CREATEDATE="2024-04-24T11:11:46.027506">
<mets:agent TYPE="OTHER" OTHERTYPE="SOFTWARE" ROLE="CREATOR">
<mets:name>ocrd/core v2.51.0</mets:name>
<mets:name>ocrd/core v2.63.3</mets:name>
</mets:agent>
</mets:metsHdr>
<mets:dmdSec ID="DMDLOG_0001">
Expand All @@ -18,30 +18,42 @@
<mets:fileSec>
<mets:fileGrp USE="OCR-D-IMG">
<mets:file ID="OCR-D-IMG_f18xx-Missio-EMU-0042" MIMETYPE="image/jpeg">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="images/18xx-Missio-EMU-0042.jpg"/>
<mets:FLocat xlink:href="images/18xx-Missio-EMU-0042.jpg" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-IMG_Ansiedlung_Korotschin_UZS_Sign_22a_0018" MIMETYPE="image/tiff">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="images/Ansiedlung_Korotschin_UZS_Sign_22a_0018.tif"/>
<mets:FLocat xlink:href="images/Ansiedlung_Korotschin_UZS_Sign_22a_0018.tif" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-IMG_Ansiedlung_WD_Wielun_Lentschütz_0053" MIMETYPE="image/tiff">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="images/Ansiedlung_WD_Wielun_Lentschütz_0053.tif"/>
<mets:FLocat xlink:href="images/Ansiedlung_WD_Wielun_Lentschütz_0053.tif" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-IMG_Lodz_UZS_25_0056" MIMETYPE="image/tiff">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="images/Lodz_UZS_25_0056.tif"/>
<mets:FLocat xlink:href="images/Lodz_UZS_25_0056.tif" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-IMG_nd1969-01-21_3" MIMETYPE="image/jpeg">
<mets:FLocat xlink:href="images/nd1969-01-21_3.jpg" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-IMG_nowa_doba" MIMETYPE="image/jpeg">
<mets:FLocat xlink:href="images/nowa_doba.jpg" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
</mets:fileGrp>
<mets:fileGrp USE="OCR-D-SEG-PAGE">
<mets:file ID="OCR-D-SEG-PAGE_f18xx-Missio-EMU-0042" MIMETYPE="application/vnd.prima.page+xml">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="page/18xx-Missio-EMU-0042.xml"/>
<mets:FLocat xlink:href="reference_page_xml/18xx-Missio-EMU-0042.xml" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-SEG-PAGE_Ansiedlung_Korotschin_UZS_Sign_22a_0018" MIMETYPE="application/vnd.prima.page+xml">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="page/Ansiedlung_Korotschin_UZS_Sign_22a_0018.xml"/>
<mets:FLocat xlink:href="reference_page_xml/Ansiedlung_Korotschin_UZS_Sign_22a_0018.xml" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-SEG-PAGE_Ansiedlung_WD_Wielun_Lentschütz_0053" MIMETYPE="application/vnd.prima.page+xml">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="page/Ansiedlung_WD_Wielun_Lentschütz_0053.xml"/>
<mets:FLocat xlink:href="reference_page_xml/Ansiedlung_WD_Wielun_Lentschütz_0053.xml" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-SEG-PAGE_Lodz_UZS_25_0056" MIMETYPE="application/vnd.prima.page+xml">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="page/Lodz_UZS_25_0056.xml"/>
<mets:FLocat xlink:href="reference_page_xml/Lodz_UZS_25_0056.xml" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-SEG-PAGE_nd1969-01-21_03" MIMETYPE="application/vnd.prima.page+xml">
<mets:FLocat xlink:href="reference_page_xml/nd1969-01-21_03.xml" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
<mets:file ID="OCR-D-SEG-PAGE_nowa_doba" MIMETYPE="application/vnd.prima.page+xml">
<mets:FLocat xlink:href="reference_page_xml/nowa_doba.xml" LOCTYPE="OTHER" OTHERLOCTYPE="FILE"/>
</mets:file>
</mets:fileGrp>
</mets:fileSec>
Expand All @@ -63,6 +75,16 @@
<mets:fptr FILEID="OCR-D-IMG_Lodz_UZS_25_0056"/>
<mets:fptr FILEID="OCR-D-SEG-PAGE_Lodz_UZS_25_0056"/>
</mets:div>
<mets:div TYPE="page" ID="nd1969-01-21_3">
<mets:fptr FILEID="OCR-D-IMG_nd1969-01-21_3"/>
</mets:div>
<mets:div TYPE="page" ID="nowa_doba">
<mets:fptr FILEID="OCR-D-IMG_nowa_doba"/>
<mets:fptr FILEID="OCR-D-SEG-PAGE_nowa_doba"/>
</mets:div>
<mets:div TYPE="page" ID="nd1969-01-21_03">
<mets:fptr FILEID="OCR-D-SEG-PAGE_nd1969-01-21_03"/>
</mets:div>
</mets:div>
</mets:structMap>
</mets:mets>
Loading

0 comments on commit 55fe416

Please sign in to comment.