-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathmain.py
147 lines (122 loc) · 7.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import json
import logging as log
import os
import sys
import dateparser
from time import time as ts
import yaml
from szz.ag_szz import AGSZZ
from szz.b_szz import BaseSZZ
from szz.l_szz import LSZZ
from szz.ma_szz import MASZZ, DetectLineMoved
from szz.r_szz import RSZZ
from szz.ra_szz import RASZZ
log.basicConfig(level=log.INFO, format='%(asctime)s :: %(levelname)s :: %(message)s')
log.getLogger('pydriller').setLevel(log.WARNING)
def main(input_json: str, out_json: str, conf: dict(), repos_dir: str):
with open(input_json, 'r') as in_file:
bugfix_commits = json.loads(in_file.read())
tot = len(bugfix_commits)
for i, commit in enumerate(bugfix_commits):
bug_introducing_commits = set()
repo_name = commit['repo_name']
repo_url = f'https://test:[email protected]/{repo_name}.git' # using test:test as git login to skip private repos during clone
fix_commit = commit['fix_commit_hash']
log.info(f'{i + 1} of {tot}: {repo_name} {fix_commit}')
commit_issue_date = None
if conf.get('issue_date_filter', None):
commit_issue_date = (commit.get('earliest_issue_date', None) or commit.get('best_scenario_issue_date', None))
commit_issue_date = dateparser.parse(commit_issue_date).timestamp()
szz_name = conf['szz_name']
if szz_name == 'b':
b_szz = BaseSZZ(repo_full_name=repo_name, repo_url=repo_url, repos_dir=repos_dir)
imp_files = b_szz.get_impacted_files(fix_commit_hash=fix_commit, file_ext_to_parse=conf.get('file_ext_to_parse'), only_deleted_lines=conf.get('only_deleted_lines', True))
bug_introducing_commits = b_szz.find_bic(fix_commit_hash=fix_commit,
impacted_files=imp_files,
ignore_revs_file_path=conf.get('ignore_revs_file_path'),
issue_date_filter=conf.get('issue_date_filter'),
issue_date=commit_issue_date)
elif szz_name == 'ag':
ag_szz = AGSZZ(repo_full_name=repo_name, repo_url=repo_url, repos_dir=repos_dir)
imp_files = ag_szz.get_impacted_files(fix_commit_hash=fix_commit, file_ext_to_parse=conf.get('file_ext_to_parse'), only_deleted_lines=conf.get('only_deleted_lines', True))
bug_introducing_commits = ag_szz.find_bic(fix_commit_hash=fix_commit,
impacted_files=imp_files,
ignore_revs_file_path=conf.get('ignore_revs_file_path'),
max_change_size=conf.get('max_change_size'),
issue_date_filter=conf.get('issue_date_filter'),
issue_date=commit_issue_date)
elif szz_name == 'ma':
ma_szz = MASZZ(repo_full_name=repo_name, repo_url=repo_url, repos_dir=repos_dir)
imp_files = ma_szz.get_impacted_files(fix_commit_hash=fix_commit, file_ext_to_parse=conf.get('file_ext_to_parse'), only_deleted_lines=conf.get('only_deleted_lines', True))
bug_introducing_commits = ma_szz.find_bic(fix_commit_hash=fix_commit,
impacted_files=imp_files,
ignore_revs_file_path=conf.get('ignore_revs_file_path'),
max_change_size=conf.get('max_change_size'),
detect_move_from_other_files=DetectLineMoved(conf.get('detect_move_from_other_files')),
issue_date_filter=conf.get('issue_date_filter'),
issue_date=commit_issue_date)
elif szz_name == 'r':
r_szz = RSZZ(repo_full_name=repo_name, repo_url=repo_url, repos_dir=repos_dir)
imp_files = r_szz.get_impacted_files(fix_commit_hash=fix_commit, file_ext_to_parse=conf.get('file_ext_to_parse'), only_deleted_lines=conf.get('only_deleted_lines', True))
bug_introducing_commits = r_szz.find_bic(fix_commit_hash=fix_commit,
impacted_files=imp_files,
ignore_revs_file_path=conf.get('ignore_revs_file_path'),
max_change_size=conf.get('max_change_size'),
detect_move_from_other_files=DetectLineMoved(conf.get('detect_move_from_other_files')),
issue_date_filter=conf.get('issue_date_filter'),
issue_date=commit_issue_date)
elif szz_name == 'l':
l_szz = LSZZ(repo_full_name=repo_name, repo_url=repo_url, repos_dir=repos_dir)
imp_files = l_szz.get_impacted_files(fix_commit_hash=fix_commit, file_ext_to_parse=conf.get('file_ext_to_parse'), only_deleted_lines=conf.get('only_deleted_lines', True))
bug_introducing_commits = l_szz.find_bic(fix_commit_hash=fix_commit,
impacted_files=imp_files,
ignore_revs_file_path=conf.get('ignore_revs_file_path'),
max_change_size=conf.get('max_change_size'),
detect_move_from_other_files=DetectLineMoved(conf.get('detect_move_from_other_files')),
issue_date_filter=conf.get('issue_date_filter'),
issue_date=commit_issue_date)
elif szz_name == 'ra':
ra_szz = RASZZ(repo_full_name=repo_name, repo_url=repo_url, repos_dir=repos_dir)
imp_files = ra_szz.get_impacted_files(fix_commit_hash=fix_commit, file_ext_to_parse=conf.get('file_ext_to_parse'), only_deleted_lines=conf.get('only_deleted_lines', True))
bug_introducing_commits = ra_szz.find_bic(fix_commit_hash=fix_commit,
impacted_files=imp_files,
ignore_revs_file_path=conf.get('ignore_revs_file_path'),
max_change_size=conf.get('max_change_size'),
detect_move_from_other_files=DetectLineMoved(conf.get('detect_move_from_other_files')),
issue_date_filter=conf.get('issue_date_filter'),
issue_date=commit_issue_date)
else:
log.info(f'SZZ implementation not found: {szz_name}')
exit(-3)
log.info(f"result: {bug_introducing_commits}")
bugfix_commits[i]["inducing_commit_hash"] = [bic.hexsha for bic in bug_introducing_commits if bic]
with open(out_json, 'w') as out:
json.dump(bugfix_commits, out)
log.info("+++ DONE +++")
if __name__ == "__main__":
if (len(sys.argv) > 0 and '--help' in sys.argv[1]) or len(sys.argv) < 3:
print('USAGE: python main.py <bugfix_commits.json> <conf_file path> <repos_directory(optional)>')
print('If repos_directory is not set, pyszz will download each repository')
exit(-1)
input_json = sys.argv[1]
conf_file = sys.argv[2]
repos_dir = sys.argv[3] if len(sys.argv) > 3 else None
if not os.path.isfile(input_json):
log.error('invalid input json')
exit(-2)
if not os.path.isfile(conf_file):
log.error('invalid conf file')
exit(-2)
with open(conf_file, 'r') as f:
conf = yaml.safe_load(f)
log.info(f"parsed conf yml: {conf}")
szz_name = conf['szz_name']
out_dir = 'out'
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
out_json = os.path.join(out_dir, f'bic_{szz_name}_{int(ts())}.json')
if not szz_name:
log.error('The configuration file does not define the SZZ name. Please, fix.')
exit(-3)
log.info(f'Launching {szz_name}-szz')
main(input_json, out_json, conf, repos_dir)