Skip to content

Commit c58632c

Browse files
committed
upload scripts
1 parent 39ed8e4 commit c58632c

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed

annotation.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import numpy as np
2+
import sys
3+
import dihmm_ext
4+
import bed_writer
5+
import os
6+
from os import listdir,mkdir
7+
from os.path import isfile,join,exists
8+
from optparse import OptionParser
9+
import pandas as pd
10+
11+
def printlist(path,value,sep):
12+
# "print the list to text file"
13+
fid=open(path,'w')
14+
nline=[]
15+
for listvalue in value:
16+
if isinstance(listvalue[0],float):
17+
nline=sep.join(str(i) for i in listvalue)
18+
elif isinstance(listvalue[0],int):
19+
nline=sep.join(str(i) for i in listvalue)
20+
elif isinstance(listvalue[0],str):
21+
nline=sep.join(listvlaue)
22+
fid.write("%s\n" % nline)
23+
fid.close()
24+
25+
def main(argv):
26+
parser = OptionParser()
27+
parser.add_option("-t", "--table", action="store", type="string", dest="table", metavar="<file>", help="the file for currently annotated samples")
28+
parser.add_option("-n", "--name", action="store", type="string", dest="name", metavar="<file>", help="the name of the annotation pair to work on")
29+
parser.add_option("-i", "--indata", action="store", type="string", dest="indata", metavar="<file>", help="the path for prepared signal matrix in the target cell type")
30+
parser.add_option("-m", "--model", action="store", type="string", dest="model", metavar="<file>", help="the path for trained model")
31+
parser.add_option("-o", "--outfolder", action="store", type="string", dest="outfolder", metavar="<file>", help="the path for output folder to store the annotated chroms")
32+
33+
(opt, args) = parser.parse_args(argv)
34+
if len(argv) < 8:
35+
parser.print_help()
36+
sys.exit(1)
37+
38+
domain_size = 20
39+
path1=opt.indata
40+
file1=[path1+f for f in listdir(path1)]
41+
allfile=file1
42+
43+
cdf = pd.read_csv(opt.table, sep='\t', header=None)
44+
currentAnnotations = list(cdf[0])
45+
name = opt.name
46+
if name not in currentAnnotations:
47+
print("working on "+name+"...")
48+
os.mkdir(opt.outfolder)
49+
x=dihmm_ext.load_model(opt.model+'/',domain_size)
50+
p=x.emission_probabilities
51+
bt=x.bin_transition_probabilities
52+
dt=x.domain_transition_probabilities
53+
nb=x.n_bin_states
54+
nd=x.n_domain_states
55+
56+
output=opt.outfolder+'/'
57+
for file in allfile:
58+
tmp=file.split('/')[-1].split('_')
59+
cellline=tmp[0]
60+
61+
chrom=tmp[1]
62+
noutput=output+cellline+'/'
63+
if not os.path.exists(noutput):
64+
os.mkdir(noutput)
65+
a=dihmm_ext.annotate(x,[file])
66+
b=bed_writer.BedWriter(a[0],x)
67+
b.write_bed_files(noutput,cellline,chrom)
68+
anno=a[0].annotations
69+
bsd=a[0].bin_state_distributions
70+
dsd=a[0].domain_state_distributions
71+
printlist(noutput+chrom+'_bin_domain_states.txt',anno.tolist(),'\t')
72+
else:
73+
print(opt.name+" has already exsited...skip...")
74+
75+
if __name__ == "__main__":
76+
main(sys.argv)

train.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import numpy as np
2+
import dihmm_ext
3+
import bed_writer
4+
import os
5+
from os import listdir,mkdir
6+
from os.path import isfile,join,exists
7+
8+
n_bin_states = 30
9+
n_domain_states = 30
10+
domain_size = 20
11+
tolerance = 1e-6
12+
max_iter = 500
13+
14+
15+
output='/data/sysdir/home/yk890/diHMM/diHMM/chr17_3cells/model'
16+
path='/data/sysdir/home/yk890/diHMM/diHMM/chr17_3cells/data'
17+
18+
tpath=path+'/'
19+
trainfile=[tpath+f for f in listdir(tpath)]
20+
x=dihmm_ext.run_dihmm(n_bin_states, n_domain_states, domain_size, max_iter, tolerance, trainfile)
21+
toutput=output+''
22+
if not os.path.exists(toutput):
23+
os.mkdir(toutput)
24+
dihmm_ext.save_model(x,toutput)
25+
26+

0 commit comments

Comments
 (0)