@@ -21,7 +21,7 @@ def version():
21
21
def usage ():
22
22
usage_string = '''
23
23
Program: fcirc (fusion circRNA identifier)
24
- Version: 1.0.1 (written by python3)
24
+ Version: 1.0.2 (written by python3)
25
25
Contact: Zhaoqing Cai <[email protected] > HongZhang Xue <[email protected] >
26
26
27
27
Usage: python fcirc.py [options] -x <ht2-trans-idx> -f <ht2-fusion-idx-dir> {-1 <fastq1> | -1 <fastq1> -2 <fastq2>}
@@ -32,17 +32,17 @@ def usage():
32
32
transcription index filename prefix (minus trailing .X.ht2)
33
33
-f <ht2-fusion-idx-dir>, --fusion_idx_dir <ht2-fusion-idx-dir>
34
34
fusion index directory (contains fusiongenes_ref_U and fusiongenes_ref_V)
35
- -c <fusion-genes-coordinates> --fusion_genes_coord
36
- fusion genes coordinates file(defalut: fusion_genes_coordinate.txt)
37
35
-1 <fastq1>, --file1 <fastq1>
38
36
fastq file 1 (single-end pattern:only -1)
39
37
-2 <fastq2>, --file2 <fastq2>
40
38
fastq file 2 (paired-end pattern:-1 and -2, files should be like -1 xxx_1.fastq -2 xxx_2.fastq)
41
39
42
40
Optional:
43
- -q <quality_val>
41
+ -q <quality_val>
44
42
the minimum phred qulaity of read(default:0)
45
- -o <output_dir>, --output <outout_dir>
43
+ -c <fusion-genes-coordinates> --fusion_genes_coord
44
+ fusion genes coordinates file(defalut: fusion_genes_coordinate.txt in fusion index directory)
45
+ -o <output_dir>, --output <output_dir>
46
46
output file directory (default: .)
47
47
-t <int>, --thread <int>
48
48
number of hisat2 alignment and pysam filter threads to launch (default:1)
@@ -90,11 +90,7 @@ def main(argvs):
90
90
trans_idx_path = ''
91
91
fusion_idx_dir_path = ''
92
92
pattern = "single-end"
93
- software_dir = os .path .split (os .path .abspath (sys .argv [0 ]))[0 ]
94
- fusion_genes_coord = software_dir + '/' + 'fusion_total_index/fusion_genes_coordinate.txt'
95
- # if not os.path.exists(Genes_coord):
96
- # print('ERROR in finding Genes_Coordinate.txt,Please check Genes_Coordinate.txt exit in fcirc.py directory')
97
- # sys.exit(1)
93
+
98
94
99
95
for opt , arg in opts :
100
96
if opt in ('-h' , '--help' ):
@@ -129,16 +125,18 @@ def main(argvs):
129
125
if os .path .isdir (abs_arg ):
130
126
fusion_idx_dir_path = abs_arg .rstrip ('/' )
131
127
elif fusion_idx_dir_path == '' :
132
- print ("Error:Fail to locate hisat2 fusion index prefix,please check {path}" .format (
128
+ print ("Error:Fail to locate hisat2 fusion index prefix, please check {path}" .format (
133
129
path = fusion_idx_dir_path ))
134
130
sys .exit (1 )
135
131
132
+ fusion_genes_coord = os .path .join (fusion_idx_dir_path , 'fusion_genes_coordinate.txt' )
133
+
136
134
if opt in ('-c' , '--fusion_genes_coord' ):
137
135
abs_arg = abs_path (arg )
138
136
if os .path .isfile (abs_arg ):
139
137
fusion_genes_coord = abs_arg
140
138
else :
141
- print ('Error to locate fusion genes coordinate file,Please check{path}' .format (path = fusion_genes_coord ))
139
+ print ('Error to locate fusion genes coordinate file, please check{path}' .format (path = fusion_genes_coord ))
142
140
sys .exit (1 )
143
141
144
142
if opt == '-q' :
@@ -190,6 +188,7 @@ def main(argvs):
190
188
output_translog = "temp/trans_" + projectname + ".log" )
191
189
return_state = os .system (hisat2_trans_command )
192
190
if return_state != 0 :
191
+ print ("[{now}] Error mapping reads to transcription!" .format (now = str_current_time ()))
193
192
exit (return_state )
194
193
195
194
print ("[{now}] Finish mapping reads to transcription!" .format (now = str_current_time ()))
@@ -335,14 +334,15 @@ def main(argvs):
335
334
hisat2_trans_idx = trans_idx_path ,
336
335
mate1fastq = fastq1_path ,
337
336
mate2fastq = fastq2_path ,
338
- output_transsam = output_path + "/ temp/trans_" + projectname + ".sam" ,
339
- output_translog = output_path + "/ temp/trans_" + projectname + ".log" )
337
+ output_transsam = " temp/trans_" + projectname + ".sam" ,
338
+ output_translog = " temp/trans_" + projectname + ".log" )
340
339
return_state = os .system (hisat2_trans_command )
341
340
if return_state != 0 :
341
+ print ("[{now}] Error: Fail mapping reads to transcription!" .format (now = str_current_time ()))
342
342
exit (return_state )
343
343
print ("[{now}] Finish mapping reads to transcription!" .format (now = str_current_time ()))
344
344
345
- unmapped_fastq1_path , unmapped_fastq2_path = drop_mapped_paired_tofastq (output_path + "temp/trans_" + projectname + ".sam" )
345
+ unmapped_fastq1_path , unmapped_fastq2_path = drop_mapped_paired_tofastq ("temp/trans_" + projectname + ".sam" )
346
346
if quality > 0 :
347
347
quality_cmd = quality_filter_PE (unmapped_fastq1_path , unmapped_fastq2_path , quality , 'pair-end' )
348
348
if quality_cmd != 0 :
@@ -363,10 +363,11 @@ def main(argvs):
363
363
hisat2_fusion_idx = fusion_idx_dir_path ,
364
364
mate1fastq = unmapped_fastq1_path ,
365
365
mate2fastq = unmapped_fastq2_path ,
366
- output_fusionsam = output_path + "/ temp/fusionU_" + projectname + ".sam" ,
367
- output_fusionlog = output_path + "/ temp/fusionU_" + projectname + ".log" )
366
+ output_fusionsam = " temp/fusionU_" + projectname + ".sam" ,
367
+ output_fusionlog = " temp/fusionU_" + projectname + ".log" )
368
368
return_state = os .system (hisat2_fusionU_command )
369
369
if return_state != 0 :
370
+ print ("[{now}] Error: Fail mapping reads to fusion references U!" .format (now = str_current_time ()))
370
371
exit (return_state )
371
372
print ("[{now}] Finish mapping reads to fusion references U!" .format (now = str_current_time ()))
372
373
@@ -384,15 +385,16 @@ def main(argvs):
384
385
hisat2_fusion_idx = fusion_idx_dir_path ,
385
386
mate1fastq = unmapped_fastq1_path ,
386
387
mate2fastq = unmapped_fastq2_path ,
387
- output_fusionsam = output_path + "/ temp/fusionV_" + projectname + ".sam" ,
388
- output_fusionlog = output_path + "/ temp/fusionV_" + projectname + ".log" )
388
+ output_fusionsam = " temp/fusionV_" + projectname + ".sam" ,
389
+ output_fusionlog = " temp/fusionV_" + projectname + ".log" )
389
390
return_state = os .system (hisat2_fusionV_command )
390
391
if return_state != 0 :
392
+ print ("[{now}] Error: Fail mapping reads to fusion references V!" .format (now = str_current_time ()))
391
393
exit (return_state )
392
394
print ("[{now}] Finish mapping reads to fusion references V!" .format (now = str_current_time ()))
393
395
394
- mapped_samU_path = drop_unmapped_paired_tosam (output_path + "temp/fusionU_" + projectname + ".sam" , thread = str (thread ))
395
- mapped_samV_path = drop_unmapped_paired_tosam (output_path + "temp/fusionV_" + projectname + ".sam" , thread = str (thread ))
396
+ mapped_samU_path = drop_unmapped_paired_tosam ("temp/fusionU_" + projectname + ".sam" , thread = str (thread ))
397
+ mapped_samV_path = drop_unmapped_paired_tosam ("temp/fusionV_" + projectname + ".sam" , thread = str (thread ))
396
398
397
399
mapped_filtered_samU_path , mapped_filtered_samV_path = getpairedreads (mapped_samU_path , mapped_samV_path ,
398
400
pattern , fusion_idx_dir_path )
@@ -406,7 +408,7 @@ def main(argvs):
406
408
exit (return_state )
407
409
408
410
# reads both exists in U and V
409
- sam_paired_tofastq (mapped_filtered_samV_path , output_path + "temp/inferred_fusion_1.fastq" , output_path + "temp/inferred_fusion_2.fastq" )
411
+ sam_paired_tofastq (mapped_filtered_samV_path , "temp/inferred_fusion_1.fastq" , "temp/inferred_fusion_2.fastq" )
410
412
411
413
hisat2_verify_fusion_command = '''hisat2 -p {thread} \
412
414
--mp 6,2 \
@@ -419,25 +421,25 @@ def main(argvs):
419
421
-2 {inferred_fusion2_fastq} \
420
422
-S {output_inferred_fusionsam} \
421
423
2>{output_inferred_fusionlog}''' .format (thread = str (thread ),
422
- inferred_fusion1_fastq = output_path + "temp/inferred_fusion_1.fastq" ,
423
- inferred_fusion2_fastq = output_path + "temp/inferred_fusion_2.fastq" ,
424
- output_inferred_fusionsam = output_path + "temp/inferred_fusion.sam" ,
425
- output_inferred_fusionlog = output_path + "temp/inferred_fusion.log" )
424
+ inferred_fusion1_fastq = "temp/inferred_fusion_1.fastq" ,
425
+ inferred_fusion2_fastq = "temp/inferred_fusion_2.fastq" ,
426
+ output_inferred_fusionsam = "temp/inferred_fusion.sam" ,
427
+ output_inferred_fusionlog = "temp/inferred_fusion.log" )
426
428
return_state = os .system (hisat2_verify_fusion_command )
427
429
if return_state == - 1 :
428
430
print ("[{now}] No fusion gene is detected!" .format (now = str_current_time ()))
429
431
exit (return_state )
430
432
print ("[{now}] Finish mapping reads to inferred fusion references!" .format (now = str_current_time ()))
431
433
432
434
# check the distribution near the fusion break point
433
- fusion_result = write_fusion (output_path + "temp/inferred_fusion.sam" )
435
+ fusion_result = write_fusion ("temp/inferred_fusion.sam" )
434
436
if fusion_result == - 1 :
435
437
exit (return_state )
436
438
437
439
# filter and get the circ-related reads
438
- transform_sam (output_path + "temp/inferred_fusion.sam" )
439
- sam_single_tofastq (output_path + "temp/inferred_fusion_tranformed.sam" , output_path + "temp/inferred_fusion_tranformed.fastq" )
440
- sam_single_tofastq (output_path + "temp/inferred_fusion_tranformed.sam" , output_path + "temp/inferred_fusion_tranformed.fastq" )
440
+ transform_sam ("temp/inferred_fusion.sam" )
441
+ sam_single_tofastq ("temp/inferred_fusion_tranformed.sam" , "temp/inferred_fusion_tranformed.fastq" )
442
+ sam_single_tofastq ("temp/inferred_fusion_tranformed.sam" , "temp/inferred_fusion_tranformed.fastq" )
441
443
442
444
hisat2_fusion_circ_command = '''hisat2 -p {thread} \
443
445
--no-softclip \
@@ -446,15 +448,15 @@ def main(argvs):
446
448
-U {inferred_fusion_circ_fastq} \
447
449
-S {output_inferred_fusion_circsam} \
448
450
2>{output_inferred_fusion_circlog}''' .format (thread = str (thread ),
449
- inferred_fusion_circ_fastq = output_path + "temp/inferred_fusion_tranformed.fastq" ,
450
- output_inferred_fusion_circsam = output_path + "temp/inferred_fusion_circ.sam" ,
451
- output_inferred_fusion_circlog = output_path + "temp/inferred_fusion_circ.log" )
451
+ inferred_fusion_circ_fastq = "temp/inferred_fusion_tranformed.fastq" ,
452
+ output_inferred_fusion_circsam = "temp/inferred_fusion_circ.sam" ,
453
+ output_inferred_fusion_circlog = "temp/inferred_fusion_circ.log" )
452
454
return_state = os .system (hisat2_fusion_circ_command )
453
455
if return_state != 0 :
454
456
exit (return_state )
455
457
# count input reads
456
- readcount = count_reads (output_path + "temp/trans_" + projectname + ".sam" )
457
- return_state = write_fcirc (output_path + "temp/inferred_fusion_circ.sam" , readcount , fusion_result )
458
+ readcount = count_reads ("temp/trans_" + projectname + ".sam" )
459
+ return_state = write_fcirc ("temp/inferred_fusion_circ.sam" , readcount , fusion_result )
458
460
if return_state == - 1 :
459
461
print ("[{now}] No fusion circRNA is detected!" .format (now = str_current_time ()))
460
462
exit (return_state )
0 commit comments