forked from MikeAxtell/CleaveLand4
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfragment-abundance.py
executable file
·74 lines (55 loc) · 2.36 KB
/
fragment-abundance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python2.7
#Created by: Bruno Costa
# ITQB 2016
#
# This calculates the distribution profile from collapsed fasta files
# fragment-abundance.py -h for help
import argparse
##### Inputs #######
#targets_file="/home/brunocosta/INIA-Targets/INIA-miRNA-classification_INIA_mappable.fa_dd__Cleaveland4_result.tsv"
#write_targets="Ppi-INIA-sRNA-all-Cleavelane4-Results-FragCount.tsv"
#density_file="/home/brunocosta/INIA-Targets/INIA_mappable.fa_dd.txt"
#
parser = argparse.ArgumentParser(description='This program add the fragment count to the cleaveland 4 output tsv based on the degradome density file')
parser.add_argument('--targets',type=str, nargs=1, metavar='Targets file',dest='targets',required=True,help='Path to the file outputed by cleaveland 4 with headers and metadata')
parser.add_argument('--output',type=str, nargs=1, metavar='Output file',dest='output',required=True,help='The output file name and destination. ex: /home/user/outputfile.tsv')
parser.add_argument('--density',type=str, nargs=1, metavar='Degradome Density file',dest='density',required=True,help='The path to the degradome density file')
args = parser.parse_args()
targets_file=args.targets[0]
write_targets=args.output[0]
density_file=args.density[0]
targets=open(targets_file,"r")
density=open(density_file,"r")
writer=open(write_targets,"w")
#This is a script provided by FPMartins to index fasta into a dictionary
d= {}
#reading from density file
for lines in density:
if lines.startswith('@ID'):
name = lines.strip().split(":",1)[1]
d[name]={}
else:
if lines.startswith("@LN"):
length_of_unigene=lines[1:].split(":",1)
else:
#Append the line to the dictionary inside the dictionary for this entry
line=lines.strip().split("\t")
if (len(line))==3:
d[name][line[0]]=line[1:]
density.close()
#Parses the target file excluding the metadata information in the beginging.
annot_targets=[lines.strip().split("\t") for lines in targets if not lines.startswith("#") ]
header=annot_targets[0]
res=[]
#Skip header line
for annotation in annot_targets[1:]:
#Grab fragment info about transcript and tslice site
annotation.append(d[annotation[2]][annotation[5]][0])
res.append(annotation)
#Output to file
writer.write(reduce(lambda x,y:x+"\t"+y, header)+"\tFrag #\n")
for i in res:
output=reduce(lambda x,y:x+"\t"+y, i)+"\n"
writer.write(output)
writer.flush()
writer.close()