38
38
_log = colorlog .getLogger (__name__ )
39
39
40
40
41
- def read_infernal (infile , replicon_id , len_model_attc ,
41
+ def read_infernal (infile , replicon_id , replicon_size ,
42
+ len_model_attc ,
42
43
evalue = 1 , size_max_attc = 200 , size_min_attc = 40 ):
43
44
"""
44
45
Function that parse cmsearch --tblout output and returns a pandas DataFrame
@@ -92,11 +93,19 @@ def read_infernal(infile, replicon_id, len_model_attc,
92
93
df .sort_values (['pos_end_tmp' , 'evalue' ], inplace = True )
93
94
df .index = list (range (0 , len (df )))
94
95
idx = (df .pos_beg_tmp > df .pos_end_tmp )
95
- df .loc [idx , "pos_beg" ] = df .loc [idx ].apply (lambda x : x ["pos_end_tmp" ] - (len_model_attc - x ["cm_fin" ]), axis = 1 )
96
- df .loc [idx , "pos_end" ] = df .loc [idx ].apply (lambda x : x ["pos_beg_tmp" ] + (x ["cm_debut" ] - 1 ), axis = 1 )
97
-
98
- df .loc [~ idx , "pos_end" ] = df .loc [~ idx ].apply (lambda x : x ["pos_end_tmp" ] + (len_model_attc - x ["cm_fin" ]), axis = 1 )
99
- df .loc [~ idx , "pos_beg" ] = df .loc [~ idx ].apply (lambda x : x ["pos_beg_tmp" ] - (x ["cm_debut" ] - 1 ), axis = 1 )
96
+ df .loc [idx , "pos_beg" ] = df .loc [idx ].apply (lambda x : max (x ["pos_end_tmp" ] - (len_model_attc - x ["cm_fin" ]),
97
+ 0 ),
98
+ axis = 1 )
99
+ df .loc [idx , "pos_end" ] = df .loc [idx ].apply (lambda x : min (x ["pos_beg_tmp" ] + (x ["cm_debut" ] - 1 ),
100
+ replicon_size ),
101
+ axis = 1 )
102
+
103
+ df .loc [~ idx , "pos_beg" ] = df .loc [~ idx ].apply (lambda x : max (x ["pos_beg_tmp" ] - (x ["cm_debut" ] - 1 ),
104
+ 0 )
105
+ , axis = 1 )
106
+ df .loc [~ idx , "pos_end" ] = df .loc [~ idx ].apply (lambda x : min (x ["pos_end_tmp" ] + (len_model_attc - x ["cm_fin" ]),
107
+ replicon_size )
108
+ , axis = 1 )
100
109
101
110
df = df [["Accession_number" , "cm_attC" , "cm_debut" , "cm_fin" , "pos_beg" , "pos_end" , "sens" , "evalue" ]]
102
111
df ["cm_attC" ] = df ["cm_attC" ].str .lower ()
@@ -198,7 +207,7 @@ def local_max(replicon,
198
207
cmsearch_cmd = \
199
208
'{bin} -Z {size} {strand} --max --cpu {cpu} -A {out} --tblout {tblout} -E 10 ' \
200
209
'--incE {incE} {mod_attc_path} {infile}' .format (bin = cmsearch_bin .replace (' ' , '\\ ' ),
201
- size = replicon_size / 1000000. ,
210
+ size = replicon_size / 1000000. , # search space size in *Mb*
202
211
strand = {"both" : "" ,
203
212
"top" : "--toponly" ,
204
213
"bottom" : "--bottomonly" }[strand_search ],
@@ -217,7 +226,9 @@ def local_max(replicon,
217
226
if completed_process .returncode != 0 :
218
227
raise RuntimeError (f"{ cmsearch_cmd } failed returncode = { completed_process .returncode } " )
219
228
df_max = read_infernal (tblout_path ,
220
- replicon .id , model_len (model_attc_path ),
229
+ replicon .id ,
230
+ replicon_size ,
231
+ model_len (model_attc_path ),
221
232
evalue = evalue_attc ,
222
233
size_max_attc = max_attc_size ,
223
234
size_min_attc = min_attc_size )
0 commit comments