Skip to content

Commit d6c75d7

Browse files
committed
Make arrow ds compilation work with container classes
1 parent b4b0589 commit d6c75d7

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

kraken/lib/arrow_dataset.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def parse_path(path: Union[str, 'PathLike'],
104104
return {'image': path, 'lines': [{'text': gt}]}
105105

106106

107-
def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', Dict]]] = None,
107+
def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', 'Segmentation']]] = None,
108108
output_file: Union[str, 'PathLike'] = None,
109109
format_type: str = 'xml',
110110
num_workers: int = 0,
@@ -120,7 +120,7 @@ def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', Dict]]] = N
120120
binary dataset.
121121
122122
Args:
123-
files: List of XML input files.
123+
files: List of XML input files or Segmentation container objects.
124124
output_file: Path to the output file.
125125
format_type: One of `xml`, `alto`, `page`, `path`, or None. In `None`
126126
mode, the files argument is expected to be a list of
@@ -191,9 +191,9 @@ def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', Dict]]] = N
191191
alphabet = Counter()
192192
num_lines = 0
193193
for doc in docs:
194-
if format_type in ['xml', 'alto', 'page']:
194+
if format_type in ['xml', 'alto', 'page', None]:
195195
lines = doc.lines.values()
196-
else:
196+
elif format_type == 'path':
197197
lines = doc['lines']
198198
for line in lines:
199199
num_lines += 1

0 commit comments

Comments
 (0)