Skip to content

Commit 8c375a4

Browse files
committed
:fix: fix read_infernal see #114
when attc site start at the first position and the model is truncated cordinates are wrong (negative) see #114
1 parent 75d7b58 commit 8c375a4

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

integron_finder/infernal.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
_log = colorlog.getLogger(__name__)
3939

4040

41-
def read_infernal(infile, replicon_id, len_model_attc,
41+
def read_infernal(infile, replicon_id, replicon_size,
42+
len_model_attc,
4243
evalue=1, size_max_attc=200, size_min_attc=40):
4344
"""
4445
Function that parse cmsearch --tblout output and returns a pandas DataFrame
@@ -92,11 +93,19 @@ def read_infernal(infile, replicon_id, len_model_attc,
9293
df.sort_values(['pos_end_tmp', 'evalue'], inplace=True)
9394
df.index = list(range(0, len(df)))
9495
idx = (df.pos_beg_tmp > df.pos_end_tmp)
95-
df.loc[idx, "pos_beg"] = df.loc[idx].apply(lambda x: x["pos_end_tmp"] - (len_model_attc - x["cm_fin"]), axis=1)
96-
df.loc[idx, "pos_end"] = df.loc[idx].apply(lambda x: x["pos_beg_tmp"] + (x["cm_debut"] - 1), axis=1)
97-
98-
df.loc[~idx, "pos_end"] = df.loc[~idx].apply(lambda x: x["pos_end_tmp"] + (len_model_attc - x["cm_fin"]), axis=1)
99-
df.loc[~idx, "pos_beg"] = df.loc[~idx].apply(lambda x: x["pos_beg_tmp"] - (x["cm_debut"] - 1), axis=1)
96+
df.loc[idx, "pos_beg"] = df.loc[idx].apply(lambda x: max(x["pos_end_tmp"] - (len_model_attc - x["cm_fin"]),
97+
0),
98+
axis=1)
99+
df.loc[idx, "pos_end"] = df.loc[idx].apply(lambda x: min(x["pos_beg_tmp"] + (x["cm_debut"] - 1),
100+
replicon_size),
101+
axis=1)
102+
103+
df.loc[~idx, "pos_beg"] = df.loc[~idx].apply(lambda x: max(x["pos_beg_tmp"] - (x["cm_debut"] - 1),
104+
0)
105+
, axis=1)
106+
df.loc[~idx, "pos_end"] = df.loc[~idx].apply(lambda x: min(x["pos_end_tmp"] + (len_model_attc - x["cm_fin"]),
107+
replicon_size)
108+
, axis=1)
100109

101110
df = df[["Accession_number", "cm_attC", "cm_debut", "cm_fin", "pos_beg", "pos_end", "sens", "evalue"]]
102111
df["cm_attC"] = df["cm_attC"].str.lower()
@@ -198,7 +207,7 @@ def local_max(replicon,
198207
cmsearch_cmd = \
199208
'{bin} -Z {size} {strand} --max --cpu {cpu} -A {out} --tblout {tblout} -E 10 ' \
200209
'--incE {incE} {mod_attc_path} {infile}'.format(bin=cmsearch_bin.replace(' ', '\\ '),
201-
size=replicon_size / 1000000.,
210+
size=replicon_size / 1000000., # search space size in *Mb*
202211
strand={"both": "",
203212
"top": "--toponly",
204213
"bottom": "--bottomonly"}[strand_search],
@@ -217,7 +226,9 @@ def local_max(replicon,
217226
if completed_process.returncode != 0:
218227
raise RuntimeError(f"{cmsearch_cmd} failed returncode = {completed_process.returncode}")
219228
df_max = read_infernal(tblout_path,
220-
replicon.id, model_len(model_attc_path),
229+
replicon.id,
230+
replicon_size,
231+
model_len(model_attc_path),
221232
evalue=evalue_attc,
222233
size_max_attc=max_attc_size,
223234
size_min_attc=min_attc_size)

integron_finder/integron.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,10 @@ def find_integron(replicon, prot_db, intI_file, phageI_file, cfg, attc_file=None
110110
elif attc_file:
111111
# it call after default search
112112
local_max_done = False
113-
attc = read_infernal(attc_file, replicon.id, cfg.model_len,
113+
attc = read_infernal(attc_file,
114+
replicon.id,
115+
len(replicon),
116+
cfg.model_len,
114117
evalue=cfg.evalue_attc,
115118
size_max_attc=cfg.max_attc_size,
116119
size_min_attc=cfg.min_attc_size)

0 commit comments

Comments
 (0)