Skip to content

Commit 25c3c95

Browse files
committed
fix bugs in paired-end pattern
1 parent f442b1d commit 25c3c95

File tree

3 files changed

+38
-1208
lines changed

3 files changed

+38
-1208
lines changed

fcirc.py

+37-35
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def version():
2121
def usage():
2222
usage_string = '''
2323
Program: fcirc (fusion circRNA identifier)
24-
Version: 1.0.1(written by python3)
24+
Version: 1.0.2(written by python3)
2525
Contact: Zhaoqing Cai <[email protected]> HongZhang Xue <[email protected]>
2626
2727
Usage: python fcirc.py [options] -x <ht2-trans-idx> -f <ht2-fusion-idx-dir> {-1 <fastq1> | -1 <fastq1> -2 <fastq2>}
@@ -32,17 +32,17 @@ def usage():
3232
transcription index filename prefix (minus trailing .X.ht2)
3333
-f <ht2-fusion-idx-dir>, --fusion_idx_dir <ht2-fusion-idx-dir>
3434
fusion index directory (contains fusiongenes_ref_U and fusiongenes_ref_V)
35-
-c <fusion-genes-coordinates> --fusion_genes_coord
36-
fusion genes coordinates file(defalut: fusion_genes_coordinate.txt)
3735
-1 <fastq1>, --file1 <fastq1>
3836
fastq file 1 (single-end pattern:only -1)
3937
-2 <fastq2>, --file2 <fastq2>
4038
fastq file 2 (paired-end pattern:-1 and -2, files should be like -1 xxx_1.fastq -2 xxx_2.fastq)
4139
4240
Optional:
43-
-q <quality_val>
41+
-q <quality_val>
4442
the minimum phred qulaity of read(default:0)
45-
-o <output_dir>, --output <outout_dir>
43+
-c <fusion-genes-coordinates> --fusion_genes_coord
44+
fusion genes coordinates file(defalut: fusion_genes_coordinate.txt in fusion index directory)
45+
-o <output_dir>, --output <output_dir>
4646
output file directory (default: .)
4747
-t <int>, --thread <int>
4848
number of hisat2 alignment and pysam filter threads to launch (default:1)
@@ -90,11 +90,7 @@ def main(argvs):
9090
trans_idx_path = ''
9191
fusion_idx_dir_path = ''
9292
pattern = "single-end"
93-
software_dir = os.path.split(os.path.abspath(sys.argv[0]))[0]
94-
fusion_genes_coord = software_dir + '/' + 'fusion_total_index/fusion_genes_coordinate.txt'
95-
# if not os.path.exists(Genes_coord):
96-
# print('ERROR in finding Genes_Coordinate.txt,Please check Genes_Coordinate.txt exit in fcirc.py directory')
97-
# sys.exit(1)
93+
9894

9995
for opt, arg in opts:
10096
if opt in ('-h', '--help'):
@@ -129,16 +125,18 @@ def main(argvs):
129125
if os.path.isdir(abs_arg):
130126
fusion_idx_dir_path = abs_arg.rstrip('/')
131127
elif fusion_idx_dir_path == '':
132-
print("Error:Fail to locate hisat2 fusion index prefix,please check {path}".format(
128+
print("Error:Fail to locate hisat2 fusion index prefix, please check {path}".format(
133129
path=fusion_idx_dir_path))
134130
sys.exit(1)
135131

132+
fusion_genes_coord = os.path.join(fusion_idx_dir_path, 'fusion_genes_coordinate.txt')
133+
136134
if opt in ('-c', '--fusion_genes_coord'):
137135
abs_arg = abs_path(arg)
138136
if os.path.isfile(abs_arg):
139137
fusion_genes_coord = abs_arg
140138
else:
141-
print('Error to locate fusion genes coordinate file,Please check{path}'.format(path=fusion_genes_coord))
139+
print('Error to locate fusion genes coordinate file, please check{path}'.format(path=fusion_genes_coord))
142140
sys.exit(1)
143141

144142
if opt == '-q':
@@ -190,6 +188,7 @@ def main(argvs):
190188
output_translog="temp/trans_" + projectname + ".log")
191189
return_state = os.system(hisat2_trans_command)
192190
if return_state != 0:
191+
print("[{now}] Error mapping reads to transcription!".format(now=str_current_time()))
193192
exit(return_state)
194193

195194
print("[{now}] Finish mapping reads to transcription!".format(now=str_current_time()))
@@ -335,14 +334,15 @@ def main(argvs):
335334
hisat2_trans_idx=trans_idx_path,
336335
mate1fastq=fastq1_path,
337336
mate2fastq=fastq2_path,
338-
output_transsam=output_path + "/temp/trans_" + projectname + ".sam",
339-
output_translog=output_path + "/temp/trans_" + projectname + ".log")
337+
output_transsam="temp/trans_" + projectname + ".sam",
338+
output_translog="temp/trans_" + projectname + ".log")
340339
return_state = os.system(hisat2_trans_command)
341340
if return_state != 0:
341+
print("[{now}] Error: Fail mapping reads to transcription!".format(now=str_current_time()))
342342
exit(return_state)
343343
print("[{now}] Finish mapping reads to transcription!".format(now=str_current_time()))
344344

345-
unmapped_fastq1_path, unmapped_fastq2_path = drop_mapped_paired_tofastq(output_path + "temp/trans_" + projectname + ".sam")
345+
unmapped_fastq1_path, unmapped_fastq2_path = drop_mapped_paired_tofastq("temp/trans_" + projectname + ".sam")
346346
if quality > 0:
347347
quality_cmd = quality_filter_PE(unmapped_fastq1_path, unmapped_fastq2_path, quality, 'pair-end')
348348
if quality_cmd != 0:
@@ -363,10 +363,11 @@ def main(argvs):
363363
hisat2_fusion_idx=fusion_idx_dir_path,
364364
mate1fastq=unmapped_fastq1_path,
365365
mate2fastq=unmapped_fastq2_path,
366-
output_fusionsam=output_path + "/temp/fusionU_" + projectname + ".sam",
367-
output_fusionlog=output_path + "/temp/fusionU_" + projectname + ".log")
366+
output_fusionsam="temp/fusionU_" + projectname + ".sam",
367+
output_fusionlog="temp/fusionU_" + projectname + ".log")
368368
return_state = os.system(hisat2_fusionU_command)
369369
if return_state != 0:
370+
print("[{now}] Error: Fail mapping reads to fusion references U!".format(now=str_current_time()))
370371
exit(return_state)
371372
print("[{now}] Finish mapping reads to fusion references U!".format(now=str_current_time()))
372373

@@ -384,15 +385,16 @@ def main(argvs):
384385
hisat2_fusion_idx=fusion_idx_dir_path,
385386
mate1fastq=unmapped_fastq1_path,
386387
mate2fastq=unmapped_fastq2_path,
387-
output_fusionsam=output_path + "/temp/fusionV_" + projectname + ".sam",
388-
output_fusionlog=output_path + "/temp/fusionV_" + projectname + ".log")
388+
output_fusionsam="temp/fusionV_" + projectname + ".sam",
389+
output_fusionlog="temp/fusionV_" + projectname + ".log")
389390
return_state = os.system(hisat2_fusionV_command)
390391
if return_state != 0:
392+
print("[{now}] Error: Fail mapping reads to fusion references V!".format(now=str_current_time()))
391393
exit(return_state)
392394
print("[{now}] Finish mapping reads to fusion references V!".format(now=str_current_time()))
393395

394-
mapped_samU_path = drop_unmapped_paired_tosam(output_path + "temp/fusionU_" + projectname + ".sam", thread=str(thread))
395-
mapped_samV_path = drop_unmapped_paired_tosam(output_path + "temp/fusionV_" + projectname + ".sam", thread=str(thread))
396+
mapped_samU_path = drop_unmapped_paired_tosam("temp/fusionU_" + projectname + ".sam", thread=str(thread))
397+
mapped_samV_path = drop_unmapped_paired_tosam("temp/fusionV_" + projectname + ".sam", thread=str(thread))
396398

397399
mapped_filtered_samU_path, mapped_filtered_samV_path = getpairedreads(mapped_samU_path, mapped_samV_path,
398400
pattern, fusion_idx_dir_path)
@@ -406,7 +408,7 @@ def main(argvs):
406408
exit(return_state)
407409

408410
# reads both exists in U and V
409-
sam_paired_tofastq(mapped_filtered_samV_path, output_path + "temp/inferred_fusion_1.fastq", output_path + "temp/inferred_fusion_2.fastq")
411+
sam_paired_tofastq(mapped_filtered_samV_path, "temp/inferred_fusion_1.fastq", "temp/inferred_fusion_2.fastq")
410412

411413
hisat2_verify_fusion_command = '''hisat2 -p {thread} \
412414
--mp 6,2 \
@@ -419,25 +421,25 @@ def main(argvs):
419421
-2 {inferred_fusion2_fastq} \
420422
-S {output_inferred_fusionsam} \
421423
2>{output_inferred_fusionlog}'''.format(thread=str(thread),
422-
inferred_fusion1_fastq=output_path + "temp/inferred_fusion_1.fastq",
423-
inferred_fusion2_fastq=output_path + "temp/inferred_fusion_2.fastq",
424-
output_inferred_fusionsam=output_path + "temp/inferred_fusion.sam",
425-
output_inferred_fusionlog=output_path + "temp/inferred_fusion.log")
424+
inferred_fusion1_fastq="temp/inferred_fusion_1.fastq",
425+
inferred_fusion2_fastq="temp/inferred_fusion_2.fastq",
426+
output_inferred_fusionsam="temp/inferred_fusion.sam",
427+
output_inferred_fusionlog="temp/inferred_fusion.log")
426428
return_state = os.system(hisat2_verify_fusion_command)
427429
if return_state == -1:
428430
print("[{now}] No fusion gene is detected!".format(now=str_current_time()))
429431
exit(return_state)
430432
print("[{now}] Finish mapping reads to inferred fusion references!".format(now=str_current_time()))
431433

432434
# check the distribution near the fusion break point
433-
fusion_result = write_fusion(output_path + "temp/inferred_fusion.sam")
435+
fusion_result = write_fusion("temp/inferred_fusion.sam")
434436
if fusion_result == -1:
435437
exit(return_state)
436438

437439
# filter and get the circ-related reads
438-
transform_sam(output_path + "temp/inferred_fusion.sam")
439-
sam_single_tofastq(output_path + "temp/inferred_fusion_tranformed.sam", output_path + "temp/inferred_fusion_tranformed.fastq")
440-
sam_single_tofastq(output_path + "temp/inferred_fusion_tranformed.sam", output_path + "temp/inferred_fusion_tranformed.fastq")
440+
transform_sam("temp/inferred_fusion.sam")
441+
sam_single_tofastq("temp/inferred_fusion_tranformed.sam", "temp/inferred_fusion_tranformed.fastq")
442+
sam_single_tofastq("temp/inferred_fusion_tranformed.sam", "temp/inferred_fusion_tranformed.fastq")
441443

442444
hisat2_fusion_circ_command = '''hisat2 -p {thread} \
443445
--no-softclip \
@@ -446,15 +448,15 @@ def main(argvs):
446448
-U {inferred_fusion_circ_fastq} \
447449
-S {output_inferred_fusion_circsam} \
448450
2>{output_inferred_fusion_circlog}'''.format(thread=str(thread),
449-
inferred_fusion_circ_fastq=output_path + "temp/inferred_fusion_tranformed.fastq",
450-
output_inferred_fusion_circsam=output_path + "temp/inferred_fusion_circ.sam",
451-
output_inferred_fusion_circlog=output_path + "temp/inferred_fusion_circ.log")
451+
inferred_fusion_circ_fastq="temp/inferred_fusion_tranformed.fastq",
452+
output_inferred_fusion_circsam="temp/inferred_fusion_circ.sam",
453+
output_inferred_fusion_circlog="temp/inferred_fusion_circ.log")
452454
return_state = os.system(hisat2_fusion_circ_command)
453455
if return_state != 0:
454456
exit(return_state)
455457
# count input reads
456-
readcount = count_reads(output_path + "temp/trans_" + projectname + ".sam")
457-
return_state = write_fcirc(output_path + "temp/inferred_fusion_circ.sam", readcount, fusion_result)
458+
readcount = count_reads("temp/trans_" + projectname + ".sam")
459+
return_state = write_fcirc("temp/inferred_fusion_circ.sam", readcount, fusion_result)
458460
if return_state == -1:
459461
print("[{now}] No fusion circRNA is detected!".format(now=str_current_time()))
460462
exit(return_state)

readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ python fcirc.py -t 4 -o fcirc_out -x transcriptome_HISAT2_index_path -f known_fu
149149
```
150150
It costs few minutes. If it runs successfully, some log information will be printed as following:
151151
```
152-
[2022-01-13 21:48:19] Start running # fcirc/fcirc.py -t 1 -o fcirc_test -x ref/grch38_tran/genome_tran -f fcirc/fusion_total_index/ -1 fcirc/test_data/test.fastq
152+
[2022-01-13 21:48:19] Start running # fcirc/fcirc.py -t 1 -o fcirc_test -x ref/grch38_tran/genome_tran -f fcirc/fusion_total_index/ -1 fcirc/test_data/test.fastq.gz
153153
[2022-01-13 21:48:27] Finish mapping reads to transcription!
154154
[2022-01-13 21:48:27] Finish mapping reads to fusion references U!
155155
[2022-01-13 21:48:27] Finish mapping reads to fusion references V!

0 commit comments

Comments
 (0)