Skip to content

Commit c5a5211

Browse files
committed
add test
1 parent 8b42f00 commit c5a5211

File tree

4 files changed

+93
-40
lines changed

4 files changed

+93
-40
lines changed

.github/workflows/publish.yml

+14
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ env:
88
REGISTRY: ghcr.io
99
IMAGE_NAME: ${{ github.repository }}
1010

11+
defaults:
12+
run:
13+
# GitHub Actions run without a TTY device. This is a workaround to get one,
14+
# based on https://github.com/actions/runner/issues/241#issuecomment-2019042651
15+
shell: 'script --return --quiet --log-out /dev/null --command "bash -e {0}"'
16+
1117
jobs:
1218
build-and-push-image:
1319
runs-on: ubuntu-latest
@@ -20,6 +26,14 @@ jobs:
2026
- name: Checkout repository
2127
uses: actions/checkout@v4
2228

29+
- name: Run tests
30+
run: |
31+
docker run \
32+
-v ./src:/app \
33+
-v ./pdf:/app/pdf \
34+
$(docker build -q ./src) \
35+
bash src/test/example.sh
36+
2337
- name: Log in to the Container registry
2438
uses: docker/[email protected]
2539
with:

src/main.py

+65-39
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from pathlib import Path
99

1010
import pymupdf
11-
from joblib import Parallel, delayed
1211
from natsort import natsorted, ns
1312
from PIL import Image
1413

@@ -34,7 +33,7 @@ def predict(base: Path, input_file: Path, args: list[str]) -> None:
3433
[
3534
"bash",
3635
"-c",
37-
f"ocrmypdf --jobs 1 {' '.join(args)} {input_file} {output_file}",
36+
f"ocrmypdf {' '.join(args)} {input_file} {output_file}",
3837
],
3938
check=True,
4039
)
@@ -45,50 +44,77 @@ def predict(base: Path, input_file: Path, args: list[str]) -> None:
4544
pass
4645

4746

47+
def cleanup(root: str, files: list[str]) -> None:
48+
"""
49+
Removes empty directory
50+
51+
Args:
52+
root (str): The root directory
53+
files (list[str]): The list of files
54+
"""
55+
if not files:
56+
try:
57+
os.rmdir(root)
58+
except Exception:
59+
pass
60+
61+
62+
def merge(base: Path, root: str, files: list[str]) -> None:
63+
"""
64+
Merges the PDFs in the list
65+
66+
Args:
67+
base (Path): The base directory
68+
root (str): The root directory
69+
files (list[str]): The list of files
70+
"""
71+
proot = Path(root)
72+
if proot == base / "done":
73+
return
74+
75+
pdf_list = [
76+
pymupdf.open(proot / file) for file in files if file.lower().endswith(".pdf")
77+
]
78+
if not pdf_list:
79+
return
80+
81+
merged = pymupdf.open()
82+
for pdf in natsorted(pdf_list, key=lambda x: x.name, alg=ns.IGNORECASE):
83+
merged.insert_pdf(pdf)
84+
85+
merged.save(Path(root + ".pdf"), garbage=4, deflate=True)
86+
merged.close()
87+
88+
for pdf in pdf_list:
89+
pdf.close()
90+
91+
4892
if __name__ == "__main__":
4993
pdfs = Path(sys.argv[1] if len(sys.argv) > 1 else ".")
5094
pdfs.mkdir(exist_ok=True, parents=True)
5195
(pdfs / "todo").mkdir(exist_ok=True, parents=True)
5296
(pdfs / "done").mkdir(exist_ok=True, parents=True)
5397

54-
Parallel(n_jobs=-1)(
55-
delayed(predict)(
56-
pdfs,
57-
Path(root) / file,
58-
sys.argv[2:] if len(sys.argv) > 2 else ["--rotate-pages", "--deskew", "--skip-text", "--invalidate-digital-signatures", "--clean"],
59-
)
60-
for root, _, files in os.walk(pdfs / "todo")
61-
for file in files
62-
)
98+
for root, _, files in os.walk(pdfs / "todo"):
99+
for file in files:
100+
predict(
101+
pdfs,
102+
Path(root) / file,
103+
(
104+
sys.argv[2:]
105+
if len(sys.argv) > 2
106+
else [
107+
"--rotate-pages",
108+
"--deskew",
109+
"--skip-text",
110+
"--invalidate-digital-signatures",
111+
"--clean",
112+
]
113+
),
114+
)
63115

64-
# Remove empty directories
65116
for root, _, files in os.walk(pdfs / "todo"):
66-
if not files:
67-
try:
68-
os.rmdir(root)
69-
except Exception:
70-
pass
117+
cleanup(root, files)
71118

72-
# Merge PDFs
73119
for root, _, files in os.walk(pdfs / "done"):
74-
proot = Path(root)
75-
if proot == pdfs / "done":
76-
continue
77-
78-
pdf_list = [
79-
pymupdf.open(proot / file)
80-
for file in files
81-
if file.lower().endswith(".pdf")
82-
]
83-
if not pdf_list:
84-
continue
85-
86-
merged = pymupdf.open()
87-
for pdf in natsorted(pdf_list, key=lambda x: x.name, alg=ns.IGNORECASE):
88-
merged.insert_pdf(pdf)
89-
90-
merged.save(Path(root + ".pdf"), garbage=4, deflate=True)
91-
merged.close()
92-
93-
for pdf in pdf_list:
94-
pdf.close()
120+
merge(pdfs, root, files)

src/predict.sh

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ if ! apt_install "$langs"; then
1313
fi
1414

1515
[ -d venv ] || python3 -m venv venv
16-
export OMP_THREAD_LIMIT=1
1716

1817
if [[ -e venv/bin/pip3 ]]; then
1918
source venv/bin/activate

src/test/example.sh

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
black_box_single_pdf() {
6+
\cp -f pdf/todo/example.pdf.bak pdf/todo/example.pdf
7+
bash src/predict.sh pdf
8+
[ ! -f pdf/todo/example.pdf ] || exit 1
9+
[ -f pdf/done/example.pdf ] || exit 1
10+
rm -f pdf/done/example.pdf
11+
}
12+
13+
black_box_single_pdf
14+
echo "All tests passed!"

0 commit comments

Comments
 (0)