-
Notifications
You must be signed in to change notification settings - Fork 31
/
iLearn-protein-basic.py
54 lines (52 loc) · 3.07 KB
/
iLearn-protein-basic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
#_*_coding:utf-8_*_
import argparse
import re
from descproteins import *
from pubscripts import *
if __name__ == '__main__':
parser = argparse.ArgumentParser(usage="it's usage tip.",
description="Generating various numerical representation schemes for protein sequences")
parser.add_argument("--file", required=True, help="input fasta file")
parser.add_argument("--method", required=True,
choices=['AAC', 'EAAC', 'CKSAAP', 'DPC', 'DDE', 'TPC', 'binary',
'GAAC', 'EGAAC', 'CKSAAGP', 'GDPC', 'GTPC',
'AAINDEX', 'ZSCALE', 'BLOSUM62',
'NMBroto', 'Moran', 'Geary',
'CTDC', 'CTDT', 'CTDD',
'CTriad', 'KSCTriad',
'SOCNumber', 'QSOrder',
'PAAC', 'APAAC',
'KNNprotein', 'KNNpeptide',
'PSSM', 'SSEC', 'SSEB', 'Disorder', 'DisorderC', 'DisorderB', 'ASA', 'TA'
],
help="the encoding type")
parser.add_argument("--path", dest='filePath',
help="data file path used for 'PSSM', 'SSEB(C)', 'Disorder(BC)', 'ASA' and 'TA' encodings")
parser.add_argument("--order", dest='order',
choices=['alphabetically', 'polarity', 'sideChainVolume', 'userDefined'],
help="output order for of Amino Acid Composition (i.e. AAC, EAAC, CKSAAP, DPC, DDE, TPC) descriptors")
parser.add_argument("--userDefinedOrder", dest='userDefinedOrder',
help="user defined output order for of Amino Acid Composition (i.e. AAC, EAAC, CKSAAP, DPC, DDE, TPC) descriptors")
parser.add_argument("--format", choices=['csv', 'tsv', 'svm', 'weka', 'tsv_1'], default='svm',
help="the encoding type")
parser.add_argument("--out", help="the generated descriptor file")
args = parser.parse_args()
fastas = read_fasta_sequences.read_protein_sequences(args.file)
userDefinedOrder = args.userDefinedOrder if args.userDefinedOrder != None else 'ACDEFGHIKLMNPQRSTVWY'
userDefinedOrder = re.sub('[^ACDEFGHIKLMNPQRSTVWY]', '', userDefinedOrder)
if len(userDefinedOrder) != 20:
userDefinedOrder = 'ACDEFGHIKLMNPQRSTVWY'
myAAorder = {
'alphabetically': 'ACDEFGHIKLMNPQRSTVWY',
'polarity': 'DENKRQHSGTAPYVMCWIFL',
'sideChainVolume': 'GASDPCTNEVHQILMKRFYW',
'userDefined': userDefinedOrder
}
myOrder = myAAorder[args.order] if args.order != None else 'ACDEFGHIKLMNPQRSTVWY'
kw = {'path': args.filePath, 'order': myOrder, 'type': 'Protein'}
cmd = args.method + '.' + args.method + '(fastas, **kw)'
print('Descriptor type: ' + args.method)
encodings = eval(cmd)
out_file = args.out if args.out != None else 'encoding.txt'
save_file.save_file(encodings, args.format, out_file)