@@ -85,18 +85,23 @@ def runner(reference_index,
85
85
86
86
# Pre-parsing to find all the matching chromosomes between ref and vcf
87
87
_LOG .info ('Processing VCF file...' )
88
- matching_variants , matching_chromosomes = read_and_filter_variants (vcf_to_process ,
89
- reference_index ,
90
- ignore )
88
+ matching_variants , matching_chromosomes = read_and_filter_variants (
89
+ vcf_to_process ,
90
+ reference_index ,
91
+ ignore
92
+ )
91
93
92
94
if not matching_variants or not matching_chromosomes :
93
95
_LOG .error ("No valid variants detected. Check names in vcf versus reference and/or bed." )
94
96
sys .exit (1 )
95
97
96
- trinuc_ref_count , bed_track_length = count_trinucleotides (reference_index ,
97
- bed ,
98
- outcounts_file ,
99
- matching_chromosomes )
98
+ trinuc_ref_count , bed_track_length = count_trinucleotides (
99
+ reference_index ,
100
+ bed ,
101
+ outcounts_file ,
102
+ matching_chromosomes ,
103
+ save_trinuc
104
+ )
100
105
101
106
if not trinuc_ref_count :
102
107
_LOG .error ("No valid trinucleotides detected in reference." )
@@ -296,47 +301,6 @@ def runner(reference_index,
296
301
for k in sorted (snp_trans_freq ):
297
302
_LOG .info (f'p({ k [0 ]} --> { k [1 ]} | SNP occurs) = { snp_trans_freq [k ]} ' )
298
303
299
- # Save counts, if requested
300
- if save_trinuc :
301
- trinuc_output_data = {
302
- 'p(snp)' : average_snp_freq ,
303
- 'p(insertion)' : average_insertion_frequency ,
304
- 'p(deletion)' : average_deletion_frequency ,
305
- 'overall average mut rate' : avg_mut_rate ,
306
- 'total variants processed' : total_var ,
307
- 'homozygous probability' : homozygous_frequency
308
- }
309
-
310
- for k in sorted (trinuc_mut_prob ):
311
- trinuc_output_data [f'p({ k } mutates)' ] = trinuc_mut_prob [k ]
312
-
313
- for k in sorted (trinuc_trans_probs ):
314
- trinuc_output_data [f'p({ k [0 ]} --> { k [1 ]} | { k [0 ]} mutates)' ] = trinuc_trans_probs [k ]
315
-
316
- for k in sorted (deletion_counts ):
317
- trinuc_output_data [f'p(del length = { abs (k )} | deletion occurs)' ] = deletion_frequency [k ]
318
-
319
- for k in sorted (insertion_counts ):
320
- trinuc_output_data [f'p(insert length = { abs (k )} | insertion occurs)' ] = insertion_freqency [k ]
321
-
322
- for k in sorted (snp_trans_freq ):
323
- trinuc_output_data [f'p({ k [0 ]} --> { k [1 ]} | SNP occurs)' ] = snp_trans_freq [k ]
324
-
325
- trinuc_output_path = Path (output )
326
- trinuc_output_path = trinuc_output_path .with_suffix ('' )
327
- trinuc_output_path = trinuc_output_path .with_suffix ('.trinuc.pickle.gz' )
328
- _LOG .info (f'Saving trinucleotide counts to: { trinuc_output_path } ' )
329
-
330
- with open_output (trinuc_output_path , 'w+' ) as trinuc_outfile :
331
-
332
- pickle .dump (trinuc_output_data , trinuc_outfile )
333
-
334
- # Human-readable content of file below
335
- trinuc_outfile .write ('\n ' )
336
-
337
- for key , value in trinuc_output_data .items ():
338
- trinuc_outfile .write (f'{ key } : { value } \n ' )
339
-
340
304
_LOG .info (f'p(snp) = { average_snp_freq } ' )
341
305
_LOG .info (f'p(insertion) = { average_insertion_frequency } ' )
342
306
_LOG .info (f'p(deletion) = { average_deletion_frequency } ' )
@@ -405,6 +369,8 @@ def compute_mut_runner(reference,
405
369
if outcounts :
406
370
validate_input_path (outcounts )
407
371
outcounts = Path (outcounts )
372
+ elif save_trinuc :
373
+ outcounts = Path (output + '.trinuc.pickle.gz' )
408
374
409
375
print ('Processing reference...' )
410
376
reference_index = SeqIO .index (reference , 'fasta' )
@@ -477,8 +443,18 @@ def compute_mut_runner(reference,
477
443
output = Path (output + '.pickle.gz' )
478
444
validate_output_path (output , overwrite = overwrite_output )
479
445
480
- runner (reference_index , vcf_to_process , vcf_columns , outcounts , show_trinuc , save_trinuc ,
481
- output , bed , human_sample , skip_common )
446
+ runner (
447
+ reference_index ,
448
+ vcf_to_process ,
449
+ vcf_columns ,
450
+ outcounts ,
451
+ show_trinuc ,
452
+ save_trinuc ,
453
+ output ,
454
+ bed ,
455
+ human_sample ,
456
+ skip_common
457
+ )
482
458
483
459
if os .path .exists ('temp.vcf' ):
484
460
os .remove ('temp.vcf' )
0 commit comments