forked from gcyuan/diHMM-cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEmissions.cpp
96 lines (66 loc) · 2.58 KB
/
Emissions.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
//
// Created by Stephanos Tsoucas on 6/14/17.
//
#include "Emissions.h"
double
Emissions::emission_probability_from_index(const unsigned int bin_state,
const uword idx_of_observation,
const vec &observation) const {
double val = emissions_memo[idx_of_observation][bin_state];
if (val != -1) {
return val;
} else {
double ret = emission_probability_direct(bin_state, observation);
emissions_memo[idx_of_observation][bin_state] = ret;
return ret;
}
}
double Emissions::emission_probability(const unsigned int bin_state,
const vec &observation) const {
uword idx = vec_to_int(observation);
return emission_probability_from_index(bin_state, idx, observation);
}
colvec Emissions::emission_probabilities_col(const vec &observation) const {
uword idx = vec_to_int(observation);
colvec emissions(n_bin_states);
for (int i = 0; i < n_bin_states; i++) {
emissions(i) = emission_probability_from_index(i, idx, observation);
}
return emissions;
}
rowvec Emissions::emission_probabilities_row(const vec &observation) const {
uword idx = vec_to_int(observation);
rowvec emissions(n_bin_states);
for (int i = 0; i < n_bin_states; i++) {
emissions(i) = emission_probability_from_index(i, idx, observation);
}
return emissions;
}
// TODO update this separately for multiple chromosomes (?) may not need to,
// since initial model is gotten using concatenation of all bin data. Move this
// into emissions_dec
Emissions emissions_from_states(
const unsigned int n_bin_states, const int n_histone_marks,
const std::vector<unsigned int> &bins_to_bin_states, const mat &bin_data) {
std::set<std::vector<unsigned int>> observations_seen;
mat em = zeros(n_histone_marks, n_bin_states);
std::vector<double> state_counts(n_bin_states, 0);
for (int bin_position = 0; bin_position < bins_to_bin_states.size();
bin_position++) {
const int bin_state = bins_to_bin_states.at(bin_position);
state_counts.at(bin_state) += 1;
const vec &obs_vec = bin_data.col(bin_position);
std::vector<unsigned int> observation =
conv_to<std::vector<unsigned int>>::from(obs_vec);
observations_seen.insert(observation);
em.col(bin_state) += obs_vec;
}
for (int bin_state = 0; bin_state < n_bin_states; bin_state++) {
em.col(bin_state) /= state_counts.at(bin_state);
}
em.replace(NAN, 0.0);
const double emissions_kick = 0.1;
em *= (1 - emissions_kick);
em += emissions_kick;
return Emissions(em);
}