-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-shrink-extract-regions.mk
85 lines (60 loc) · 2.06 KB
/
gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-shrink-extract-regions.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Install by copying (or symlinking) makefiles into a directory
# where all OCR-D workspaces (unpacked BagIts) reside. Then
# chdir to that location.
# Call via:
# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or
# `make -f WORKFLOW-CONFIG.mk all` or just
# `make -f WORKFLOW-CONFIG.mk`
# To rebuild partially, you must pass -W to recursive make:
# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"`
# To get help on available goals:
# `make help`
###
# From here on, custom configuration begins.
info:
@echo "Read GT line segmentation,"
@echo "then binarize+denoise+deskew pages,"
@echo "then clip regions,"
@echo "then shrink regions into the hull polygon of its lines,"
@echo "and finally extract page images and region coordinates"
@echo "(including meta-data) into one directory,"
@echo "with corresponding filename suffixes for segmentation training."
INPUT = OCR-D-GT-SEG-LINE
$(INPUT):
ocrd workspace find -G $@ --download
ocrd workspace find -G OCR-D-IMG --download # just in case
BIN = $(INPUT)-BINPAGE-sauvola
$(BIN): $(INPUT)
$(BIN): TOOL = ocrd-olena-binarize
$(BIN): PARAMS = "impl": "sauvola-ms-split"
DEN = $(BIN)-DENOISE-ocropy
$(DEN): $(BIN)
$(DEN): TOOL = ocrd-cis-ocropy-denoise
$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0
FLIP = $(DEN)-DESKEW-tesseract
$(FLIP): $(DEN)
$(FLIP): TOOL = ocrd-tesserocr-deskew
$(FLIP): PARAMS = "operation_level": "page"
DESK = $(FLIP)-DESKEW-ocropy
$(DESK): $(FLIP)
$(DESK): TOOL = ocrd-cis-ocropy-deskew
$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5
CLIP = $(DESK)-CLIP
$(CLIP): $(DESK)
$(CLIP): TOOL = ocrd-cis-ocropy-clip
RESEG = OCR-D-SEG-LINE
$(RESEG): $(CLIP)
$(RESEG): TOOL = ocrd-cis-ocropy-segment
$(RESEG): PARAMS = "spread": 2.4
TIGHT = OCR-D-SEG-BLOCK
$(TIGHT): $(RESEG)
$(TIGHT): TOOL = ocrd-segment-repair
$(TIGHT): PARAMS = "sanitize": true
OUTPUT = OCR-D-IMG-REGIONS
$(OUTPUT): $(TIGHT)
$(OUTPUT): TOOL = ocrd-segment-extract-regions
$(OUTPUT): PARAMS = "transparency": true
.DEFAULT_GOAL = $(OUTPUT)
# Down here, custom configuration ends.
###
include Makefile