@@ -249,17 +249,17 @@ def detect_dataset(dataset_path, task='apc', load_aug=False):
249
249
dataset_file ['valid' ] += find_files (search_path , [d , 'dev' , task ], exclude_key = ['.inference' , 'train.' , 'test.' ] + filter_key_words + ['.ignore' ])
250
250
251
251
else :
252
- print ('Try to load {} dataset from local' .format (dataset_path ))
252
+ print ('Try to load {} dataset from local disk ' .format (dataset_path ))
253
253
if load_aug :
254
254
dataset_file ['train' ] += find_files (d , ['train' , task ], exclude_key = ['.inference' , 'test.' , 'valid.' ] + filter_key_words )
255
255
dataset_file ['test' ] += find_files (d , ['test' , task ], exclude_key = ['.inference' , 'train.' , 'valid.' ] + filter_key_words )
256
256
dataset_file ['valid' ] += find_files (d , ['valid' , task ], exclude_key = ['.inference' , 'train.' ] + filter_key_words )
257
257
dataset_file ['valid' ] += find_files (d , ['dev' , task ], exclude_key = ['.inference' , 'train.' ] + filter_key_words )
258
258
else :
259
259
dataset_file ['train' ] += find_cwd_files ([d , 'train' , task ], exclude_key = ['.inference' , 'test.' , 'valid.' ] + filter_key_words + ['.ignore' ])
260
- dataset_file ['test' ] += find_cwd_files ([d , 'train ' , task ], exclude_key = ['.inference' , 'train.' , 'valid.' ] + filter_key_words + ['.ignore' ])
261
- dataset_file ['valid' ] += find_cwd_files ([d , 'train ' , task ], exclude_key = ['.inference' , 'train.' , 'test.' ] + filter_key_words + ['.ignore' ])
262
- dataset_file ['valid' ] += find_cwd_files ([d , 'train ' , task ], exclude_key = ['.inference' , 'train.' , 'test.' ] + filter_key_words + ['.ignore' ])
260
+ dataset_file ['test' ] += find_cwd_files ([d , 'test ' , task ], exclude_key = ['.inference' , 'train.' , 'valid.' ] + filter_key_words + ['.ignore' ])
261
+ dataset_file ['valid' ] += find_cwd_files ([d , 'valid ' , task ], exclude_key = ['.inference' , 'train.' , 'test.' ] + filter_key_words + ['.ignore' ])
262
+ dataset_file ['valid' ] += find_cwd_files ([d , 'valid ' , task ], exclude_key = ['.inference' , 'train.' , 'test.' ] + filter_key_words + ['.ignore' ])
263
263
264
264
# # if we need train a checkpoint using as much data as possible, we can merge train, valid and test set as training sets
265
265
# dataset_file['train'] = dataset_file['train'] + dataset_file['test'] + dataset_file['valid']
0 commit comments