-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathselect_files.py
executable file
·85 lines (67 loc) · 2.43 KB
/
select_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
import argparse
import os
import random
import shutil
from pathlib import Path
def get_args() -> argparse.Namespace:
"""Return a Namespace containing the arguments.
Returns:
argparse.Namespace: Namespace with arguments.
"""
parser = argparse.ArgumentParser(
description='''This script selects n files, m from each subdirectory
in source directory, and put in target directory''')
parser.add_argument('srcdir', type=str, help='Source dir with files')
parser.add_argument('outdir', type=str, help='Output dir for files')
parser.add_argument('-n', type=int, default=10000, help='Number of files')
parser.add_argument('-m',
type=int,
default=1,
help='Number if files from subdirectory')
args = parser.parse_args()
return args
def make_dirs(source_dir: str, output_dir: str) -> tuple[Path, Path]:
"""Return Path objects with paths to source and output directories.
Args:
source_dir (str): source directory name.
output_dir (str): output directory name.
Returns:
tuple[Path, Path]: Path objects with paths.
"""
source_path = Path(source_dir)
output_path = Path(output_dir)
Path.mkdir(output_path, exist_ok=True)
return source_path, output_path
if __name__ == '__main__':
args = get_args()
source_dir = args.srcdir
output_dir = args.outdir
max_num = args.n
subdir_number = args.m
source_path, output_path = make_dirs(source_dir, output_dir)
common_counter = 0
cur_path = Path.cwd()
try:
tree = list(os.walk(source_path))
copied_files = set()
while common_counter < max_num:
for path, _, files in tree[1:]:
if files:
file = random.choice(files)
if file in copied_files:
files.remove(file)
continue
file_path = Path(path, file)
files.remove(file)
copied_files.add(file)
shutil.copy2(file_path, output_path)
common_counter += 1
if common_counter == max_num:
break
except Exception as err:
print(err)
else:
shutil.make_archive(source_dir, 'zip', output_path)
finally:
print('Script is stopped!')