Skip to content

Commit b70479b

Browse files
committed
✅ add unit test for read_infernal
test when attc site model is found found on first or last position and model is trucated check that seq beg and end are correct see #114
1 parent 690f254 commit b70479b

File tree

2 files changed

+66
-70
lines changed

2 files changed

+66
-70
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#target name accession query name accession mdl mdl from mdl to seq from seq to strand trunc pass gc bias score E-value inc description of target
2+
#------------------- --------- -------------------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- ---------------------
3+
37 - attC_4 - cm 4 44 123 1 - no 1 0.55 0.0 16.8 0.0024 ! -
4+
37 - attC_4 - cm 1 41 3065 3109 + no 1 0.53 0.0 16.8 0.0023 ! -
5+
#
6+
# Program: cmsearch
7+
# edited by hand to simulate truncated attc model on firts and last pos
8+

tests/test_read_infernal.py

+58-70
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def setUp(self):
4848
"""
4949
self.replicon_name = "acba.007.p01.13"
5050
self.replicon_id = "ACBA.007.P01_13"
51+
self.replicon_size = 20301
5152
self.length_cm = 47 # length in 'CLEN' (value for model attc_4.cm)
5253
self.dtype = {"Accession_number": "str",
5354
"cm_attC": "str",
@@ -65,7 +66,9 @@ def test_nofile(self):
6566
6667
"""
6768
filename = "infernal.txt"
68-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm)
69+
df = infernal.read_infernal(filename,
70+
self.replicon_id, self.replicon_size,
71+
self.length_cm)
6972
expect = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut",
7073
"cm_fin", "pos_beg", "pos_end", "sens", "evalue"])
7174
expect = expect.astype(self.dtype)
@@ -77,7 +80,9 @@ def test_nohit(self):
7780
inside, it returns an empty dataframe.
7881
"""
7982
filename = self.find_data(os.path.join("fictive_results", "{}_attc_table-empty.res".format(self.replicon_id)))
80-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm)
83+
df = infernal.read_infernal(filename,
84+
self.replicon_id, self.replicon_size,
85+
self.length_cm)
8186
expect = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut",
8287
"cm_fin", "pos_beg", "pos_end", "sens", "evalue"])
8388
expect = expect.astype(self.dtype)
@@ -92,7 +97,9 @@ def test_evalue_thres(self):
9297
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
9398
"tmp_{}".format(self.replicon_id),
9499
"{}_attc_table.res".format(self.replicon_id)))
95-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, evalue=1e-10)
100+
df = infernal.read_infernal(filename,
101+
self.replicon_id, self.replicon_size,
102+
self.length_cm, evalue=1e-10)
96103
expect = pd.DataFrame(columns=["Accession_number", "cm_attC", "cm_debut",
97104
"cm_fin", "pos_beg", "pos_end", "sens", "evalue"])
98105
expect = expect.astype(self.dtype)
@@ -107,7 +114,9 @@ def test_generate_df(self):
107114
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
108115
"tmp_{}".format(self.replicon_id),
109116
"{}_attc_table.res".format(self.replicon_id)))
110-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm)
117+
df = infernal.read_infernal(filename,
118+
self.replicon_id, self.replicon_size,
119+
self.length_cm)
111120
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id, self.replicon_id,],
112121
"cm_attC": ["attc_4", "attc_4", "attc_4",],
113122
"cm_debut": [1, 1, 1],
@@ -126,7 +135,9 @@ def test_attcsize_minthres(self):
126135
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
127136
"tmp_{}".format(self.replicon_id),
128137
"{}_attc_table.res".format(self.replicon_id)))
129-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, size_min_attc=60)
138+
df = infernal.read_infernal(filename,
139+
self.replicon_id, self.replicon_size,
140+
self.length_cm, size_min_attc=60)
130141
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id],
131142
"cm_attC": ["attc_4", "attc_4"],
132143
"cm_debut": [1, 1],
@@ -146,7 +157,9 @@ def test_attcsize_maxthres(self):
146157
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
147158
"tmp_{}".format(self.replicon_id),
148159
"{}_attc_table.res".format(self.replicon_id)))
149-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, size_max_attc=100)
160+
df = infernal.read_infernal(filename,
161+
self.replicon_id, self.replicon_size,
162+
self.length_cm, size_max_attc=100)
150163
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id],
151164
"cm_attC": ["attc_4", "attc_4"],
152165
"cm_debut": [1, 1],
@@ -165,11 +178,18 @@ def test_filter_evalue_thres(self):
165178
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
166179
"tmp_{}".format(self.replicon_id),
167180
"{}_attc_table.res".format(self.replicon_id)))
168-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, evalue=1e-8)
169-
expect = pd.DataFrame({"Accession_number": self.replicon_id, "cm_attC": "attc_4",
170-
"cm_debut": 1, "cm_fin": 47, "pos_beg": 17825,
171-
"pos_end": 17884, "sens": "-", "evalue": 1e-9},
172-
ignore_index=True)
181+
df = infernal.read_infernal(filename,
182+
self.replicon_id, self.replicon_size,
183+
self.length_cm, evalue=1e-8)
184+
expect = pd.DataFrame({"Accession_number": [self.replicon_id],
185+
"cm_attC": ["attc_4"],
186+
"cm_debut": [1],
187+
"cm_fin": [47],
188+
"pos_beg": [17825],
189+
"pos_end": [17884],
190+
"sens": ["-"],
191+
"evalue": [1e-9]}
192+
)
173193
expect = expect.astype(self.dtype)
174194
pdt.assert_frame_equal(df, expect)
175195

@@ -178,9 +198,12 @@ def test_no_total_cm_match_strandp(self):
178198
Test that when the model did not completely match on the sequence,
179199
the start and end positions of hit are well recalculated. All hits are on strand +
180200
"""
181-
filename = self.find_data(
182-
os.path.join("fictive_results", "{}_attc_table-partial.res".format(self.replicon_id)))
183-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm)
201+
filename = self.find_data("fictive_results",
202+
f"{self.replicon_id}_attc_table-partial.res")
203+
204+
df = infernal.read_infernal(filename,
205+
self.replicon_id, self.replicon_size,
206+
self.length_cm)
184207
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id, self.replicon_id],
185208
"cm_attC": ["attc_4", "attc_4", "attc_4"],
186209
"cm_debut": [1, 1, 10],
@@ -199,9 +222,11 @@ def test_no_total_cm_match_strandm(self):
199222
"""
200223
filename = self.find_data(
201224
os.path.join("fictive_results", "{}_attc_table-partialm.res".format(self.replicon_id)))
202-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm)
225+
df = infernal.read_infernal(filename,
226+
self.replicon_id, self.replicon_size,
227+
self.length_cm)
203228
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id, self.replicon_id],
204-
"cm_attC": ["attc_4", "attc_4", "attc_4", ],
229+
"cm_attC": ["attc_4", "attc_4", "attc_4"],
205230
"cm_debut": [1, 1, 10],
206231
"cm_fin": [40, 47, 47],
207232
"pos_beg": [17818, 19080, 19618],
@@ -212,61 +237,24 @@ def test_no_total_cm_match_strandm(self):
212237
expect = expect.astype(self.dtype)
213238
pdt.assert_frame_equal(df, expect)
214239

215-
def test_attcsize_minthres(self):
216-
"""
217-
Test that the filter by a minimum attc size works.
218-
"""
219-
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
220-
"tmp_{}".format(self.replicon_id),
221-
"{}_attc_table.res".format(self.replicon_id)))
222-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, size_min_attc=60)
223-
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id],
224-
"cm_attC": ["attc_4", "attc_4"],
225-
"cm_debut": [1, 1],
226-
"cm_fin": [47, 47],
227-
"pos_beg": [19080, 19618],
228-
"pos_end": [19149, 19726],
229-
"sens": ["-", "-"],
230-
"evalue": [1e-4, 1.1e-7]})
231-
expect = expect.astype(self.dtype)
232-
pdt.assert_frame_equal(df, expect)
240+
def test_attc_overflow_pos(self):
241+
"""test when model is truncated and on very first or last replicon pos"""
242+
filename = self.find_data('fictive_results', '37_0_200_subseq_overflow_attc_table.res')
243+
replicon_id = '37'
244+
replicon_size = 3109
245+
model_len = 47
246+
df = infernal.read_infernal(filename,
247+
replicon_id, replicon_size,
248+
model_len)
233249

234-
def test_attcsize_maxthres(self):
235-
"""
236-
Test that the filter by a maximum attc size works.
237-
"""
238-
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
239-
"tmp_{}".format(self.replicon_id),
240-
"{}_attc_table.res".format(self.replicon_id)))
241-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, size_max_attc=100)
242-
expect = pd.DataFrame({"Accession_number": [self.replicon_id, self.replicon_id],
250+
expect = pd.DataFrame({"Accession_number": [replicon_id, replicon_id],
243251
"cm_attC": ["attc_4", "attc_4"],
244-
"cm_debut": [1, 1],
245-
"cm_fin": [47, 47],
246-
"pos_beg": [17825, 19080],
247-
"pos_end": [17884, 19149],
248-
"sens": ["-", "-"],
249-
"evalue": [1e-9, 1e-4]})
250-
expect = expect.astype(self.dtype)
251-
pdt.assert_frame_equal(df, expect)
252+
"cm_debut": [4, 1],
253+
"cm_fin": [44, 41],
254+
"pos_beg": [1, 3065],
255+
"pos_end": [126, 3109],
256+
"sens": ["-", "+"],
257+
"evalue": [0.0024,0.0023]})
252258

253-
def test_filter_evalue_thres(self):
254-
"""
255-
Test that the filter by a maximum attc size works.
256-
"""
257-
filename = self.find_data(os.path.join("Results_Integron_Finder_{}".format(self.replicon_name),
258-
"tmp_{}".format(self.replicon_id),
259-
"{}_attc_table.res".format(self.replicon_id)))
260-
df = infernal.read_infernal(filename, self.replicon_id, self.length_cm, evalue=1e-8)
261-
expect =pd.DataFrame({"Accession_number": self.replicon_id,
262-
"cm_attC": "attc_4",
263-
"cm_debut": 1,
264-
"cm_fin": 47,
265-
"pos_beg": 17825,
266-
"pos_end": 17884,
267-
"sens": "-",
268-
"evalue": 1e-9},
269-
index=[0])
270259
expect = expect.astype(self.dtype)
271-
pdt.assert_frame_equal(df, expect)
272-
260+
pdt.assert_frame_equal(df, expect)

0 commit comments

Comments
 (0)