-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconfidence.py
executable file
·34 lines (28 loc) · 1 KB
/
confidence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env python3
"""
Add confidence column to feather file
Confidence values:
type - is_type is TRUE (may also have pubmed_id)
published - has pubmed_id and is_type is FALSE
direct - is_type is FALSE and no pubmed_id (the rest)
"""
import argparse
import pandas
def main():
p = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('feather')
args = p.parse_args()
info = pandas.read_feather(args.feather)
if 'confidence' in info.columns:
info = info.drop('confidence', axis='columns')
info.loc[info['is_type'], 'confidence'] = 'type'
is_published = info['confidence'].isna() & info['is_published']
info.loc[is_published, 'confidence'] = 'published'
is_refseq = info['confidence'].isna() & info['is_refseq']
info.loc[is_refseq, 'confidence'] = 'refseq'
info.loc[info['confidence'].isna(), 'confidence'] = 'direct'
info.to_feather(args.feather)
if __name__ == '__main__':
main()