-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathbatch_score.py
executable file
·140 lines (124 loc) · 3.63 KB
/
batch_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3
import argparse
import os
import sys
import traceback
from lib import core, utilities, run
from lib.attributes import Attributes
from lib.database import Database
def process_arguments():
"""
Uses the argparse module to parse commandline arguments.
Returns:
Dictionary of parsed commandline arguments.
"""
parser = argparse.ArgumentParser(
description='Calculate the scores of a set of repositories.'
)
parser.add_argument(
'--cleanup',
action='store_true',
dest='cleanup',
help='Delete cloned repositories from the disk when done.'
)
parser.add_argument(
'-c',
'--config',
type=argparse.FileType('r'),
default='config.json',
dest='config_file',
help='Path to the configuration file.'
)
parser.add_argument(
'-m',
'--manifest',
type=argparse.FileType('r'),
default='manifest.json',
dest='manifest_file',
help='Path to the manifest file.'
)
parser.add_argument(
'-r',
'--repositories-root',
dest='repositories_root',
help='Path to the root of downloaded repositories.'
)
parser.add_argument(
'-s',
'--repositories-sample',
type=argparse.FileType('r'),
dest='repositories_sample',
help='A file containing newline-separated GHTorrent project ids'
)
parser.add_argument(
'-k',
'--key-string',
type=str,
dest='key_string',
default=None,
required=False,
help='String of attribute initials. Uppercase to persist data'
)
parser.add_argument(
'-n',
'--num-processes',
type=int,
dest='num_processes',
default=1,
required=False,
help=(
'Number of processes to spawn when processing repositories'
' from the samples file.'
)
)
parser.add_argument(
'--goldenset',
action='store_true',
dest='goldenset',
help=(
'Indicate that the repositories sample file contains projects'
' from the Golden Set.'
)
)
if len(sys.argv) < 2:
parser.print_help()
sys.exit(1)
return parser.parse_args()
def main():
"""
Main execution flow.
"""
try:
args = process_arguments()
config = utilities.read(args.config_file)
manifest = utilities.read(args.manifest_file)
# TODO: Refactor
core.config = config
utilities.TOKENIZER = core.Tokenizer()
database = Database(config['options']['datasource'])
globaloptions = {
'today': config['options']['today'],
'timeout': config['options']['timeout']
}
attributes = Attributes(
manifest['attributes'], database, args.cleanup, args.key_string,
**globaloptions
)
if not os.path.exists(args.repositories_root):
os.makedirs(args.repositories_root, exist_ok=True)
table = 'reaper_results'
if args.goldenset:
table = 'reaper_goldenset'
_run = run.Run(
args.repositories_root, attributes, database,
config['options']['threshold'], args.num_processes
)
_run.run([int(line) for line in args.repositories_sample], table)
except Exception as e:
extype, exvalue, extrace = sys.exc_info()
traceback.print_exception(extype, exvalue, extrace)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print('\rCaught interrupt, killing all children...')