@@ -104,7 +104,7 @@ def parse_path(path: Union[str, 'PathLike'],
104
104
return {'image' : path , 'lines' : [{'text' : gt }]}
105
105
106
106
107
- def build_binary_dataset (files : Optional [List [Union [str , 'PathLike' , Dict ]]] = None ,
107
+ def build_binary_dataset (files : Optional [List [Union [str , 'PathLike' , 'Segmentation' ]]] = None ,
108
108
output_file : Union [str , 'PathLike' ] = None ,
109
109
format_type : str = 'xml' ,
110
110
num_workers : int = 0 ,
@@ -120,7 +120,7 @@ def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', Dict]]] = N
120
120
binary dataset.
121
121
122
122
Args:
123
- files: List of XML input files.
123
+ files: List of XML input files or Segmentation container objects .
124
124
output_file: Path to the output file.
125
125
format_type: One of `xml`, `alto`, `page`, `path`, or None. In `None`
126
126
mode, the files argument is expected to be a list of
@@ -191,9 +191,9 @@ def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', Dict]]] = N
191
191
alphabet = Counter ()
192
192
num_lines = 0
193
193
for doc in docs :
194
- if format_type in ['xml' , 'alto' , 'page' ]:
194
+ if format_type in ['xml' , 'alto' , 'page' , None ]:
195
195
lines = doc .lines .values ()
196
- else :
196
+ elif format_type == 'path' :
197
197
lines = doc ['lines' ]
198
198
for line in lines :
199
199
num_lines += 1
0 commit comments