-
Notifications
You must be signed in to change notification settings - Fork 0
/
sig_to_csv.py
35 lines (28 loc) · 976 Bytes
/
sig_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python3
"""
Given a sourmash signature, extract the minhash integers and abund info
to a csv file with the basename of the sig file name as the name of the
abund column.
"""
from sourmash import signature
import pandas as pd
import os
import sys
import argparse
def main():
p = argparse.ArgumentParser()
p.add_argument('signature') # sourmash signature
p.add_argument('output') # output csv file name
args = p.parse_args()
# load the signature from disk
sigfp = open(args.signature, 'rt')
siglist = list(signature.load_signatures(sigfp))
loaded_sig = siglist[0]
# Get the minhashes
mins = loaded_sig.minhash.get_mins(with_abundance = True)
name = os.path.basename(args.signature)
df = pd.DataFrame.from_dict(mins, orient = 'index', columns=[name], dtype=object)
# write to a csv
df.to_csv(args.output, index_label= "minhash")
if __name__ == '__main__':
sys.exit(main())