Skip to content

Commit

Permalink
mirror dir structure in sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
kjappelbaum committed Aug 14, 2024
1 parent 233e275 commit 79bc334
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/chemnlp/data/sampler_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ def process_dataset(
else:
templates = [t for t in templates if "<EOI>" not in t]

output_dir = os.path.join(output_dir, os.path.dirname(data_dir))
os.makedirs(output_dir, exist_ok=True)

for chunk_idx, df_chunk in enumerate(
pd.read_csv(data_path, chunksize=chunksize, low_memory=False)
):
Expand Down

0 comments on commit 79bc334

Please sign in to comment.