-
Notifications
You must be signed in to change notification settings - Fork 7
/
demo_segm_HMM.m
376 lines (316 loc) · 15.4 KB
/
demo_segm_HMM.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Main demo scripts for the ICSC-HMM Segmentation Algorithm proposed in:
%
% N. Figueroa and A. Billard, “Transform-Invariant Non-Parametric Clustering
% of Covariance Matrices and its Application to Unsupervised Joint Segmentation
% and Action Discovery”
% Arxiv, 2017.
%
% Author: Nadia Figueroa, PhD Student., Robotics
% Learning Algorithms and Systems Lab, EPFL (Switzerland)
% Email address: [email protected]
% Website: http://lasa.epfl.ch
% November 2016; Last revision: 25-May-2017
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% --Select a Dataset to Test-- %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 1) Toy 2D dataset, 3 Unique Emission models, 3 time-series, same swicthing
clc; clear all; close all;
N_TS = 3; display = 2 ; % 0: no-display, 1: raw data in one plot, 2: ts w/labels
[~, Data, True_states] = genToyHMMData_Gaussian( N_TS, display );
label_range = unique(True_states{1});
%% 2a) Toy 2D dataset, 4 Unique Emission models, max 5 time-series
clc; clear all; close all;
M = 4; % Number of Time-Series
[~, ~, Data, True_states] = genToySeqData_Gaussian( 4, 2, M, 500, 0.5 );
%% 2b) Toy 2D dataset, 2 Unique Emission models transformed, max 4 time-series
clc; clear all; close all;
M = 3; % Number of Time-Series
[~, TruePsi, Data, True_states] = genToySeqData_TR_Gaussian(4, 2, M, 500, 0.5 );
dataset_name = '2D Transformed';
% Similarity matrix S (4 x 4 matrix)
if exist('h1','var') && isvalid(h1), delete(h1);end
h1 = plotSimMat( TruePsi.S );
%% 5) Real 'Peeling' (max) 32-D dataset, 5 Unique Emission models, 3 time-series
% Demonstration of a Bimanual Peeling Task consisting of
% 3 (32-d) time-series X = {x_1,..,x_T} with variable length T.
%
% Dimensions:
% x_a = {pos_x, pos_y, pos_z, q_i, q_j, q_k, q_w, f_x, f_y, f_z, tau_x, tau_y, tau_z}
% - positions: Data{i}(1:3,:) (3-d: x, y, z)
% - orientations: Data{i}(4:7,:) (4-d: q_i, q_j, q_k, q_w)
% - forces: Data{i}(8:10,:) (3-d: f_x, f_y, f_z)
% - torques: Data{i}(11:13,:) (3-d: tau_x, tau_y, tau_z)
% x_p = {pos_x, pos_y, pos_z, q_i, q_j, q_k, q_w, f_x, f_y, f_z, tau_x, tau_y, tau_z}
% - same as above Data{i}(14:26,:)
% x_o = {mu_r, mu_g, mu_b, sigma_r, sigma_g, sigma_b}
% - rate_mean: Data{i}(27:29,:) (3-d: mu_r, mu_g, mu_b)
% - rate_variance: Data{i}(30:32,:) (3-d: sigma_r, sigma_g, sigma_b)
% Dimension type:
% dim: 'all', include all 32 dimensions (active + passive robots + object)
% dim: 'robots', include only 26-d from measurements from active + passive robots
% dim: 'act+obj', include only 19-d from measurements from active robot + object
% dim: 'active', include only 13-d from measurements from active robot
% Dataset type:
% sub-sampled to 100 Hz (from 500 Hz), smoothed f/t trajectories, fixed rotation
% discontinuities.
% clc;
clear all; close all
data_path = './test-data/'; display = 1;
% Type of data processing
% O: no data manipulation -- 1: zero-mean -- 2: scaled by range * weights
normalize = 2;
% Select dimensions to use
dim = 'robots';
% Define weights for dimensionality scaling
switch dim
case 'active'
weights = [5*ones(1,3) 2*ones(1,4) 1/10*ones(1,6)]'; % active
case 'act+obj'
weights = [2*ones(1,7) 1/10*ones(1,6) 2*ones(1,6)]'; % act+obj
case 'robots'
weights = [5*ones(1,3) ones(1,4) 1/10*ones(1,6) ones(1,3) 2*ones(1,4) 1/20*ones(1,6) ]'; % robots(velocities)
% weights = [5*ones(1,3) 1/2*ones(1,4) 1/10*ones(1,6) ones(1,3) 1/2*ones(1,4) 1/20*ones(1,6) ]'; % robots(position)
case 'all'
weights = [5*ones(1,3) ones(1,4) 1/10*ones(1,6) ones(1,3) 2*ones(1,4) 1/20*ones(1,6) 2*ones(1,6) ]'; % all
end
% Define if using first derivative of pos/orient
use_vel = 0;
[data, TruePsi, Data, True_states, Data_] = load_peeling_dataset( data_path, dim, display, normalize, weights, use_vel);
dataset_name = 'Peeling';
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Step 1:Load One of the Wiping Datasets %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clc; clear all; close all;
data_path = '/home/nbfigueroa/Dropbox/PhD_papers/journal-draft/new-code/ICSC-HMM/test-data/';
%%%%%%%%%%% Input Data loading function %%%%%%%%%%
% Select Dataset to Load
dataset_type = 1; % 1: Rim Cover, 2: Door Fender
% Select which Variables to use
variables = 1;
do_plots = 1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[data, TruePsi, Data, True_states, ...
data_legends, dataset_name, plot_subsample, dataset_view] = load_wiping_dataset(data_path, dataset_type, do_plots, variables);
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Step 1: Run E-M Model Selection for HMM with 10 runs in a range of K %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Model Selection for HMM
K_range = [1:15]; repeats = 5;
K_opt = hmm_eval(Data, K_range, repeats);
% Fit HMM with 'optimal' K and Apply Viterbi for Segmentation
% Set "Optimal " GMM Hyper-parameters
K = K_opt; T = 5;
ts = [1:length(Data)];
% Segmentation Metric Arrays
hamming_distance = zeros(1,T);
global_consistency = zeros(1,T);
variation_info = zeros(1,T);
% Clustering Metric Arrays
cluster_purity = zeros(1,T);
cluster_NMI = zeros(1,T);
cluster_F = zeros(1,T);
% Model Metric Arrays
logliks = zeros(length(ts),T);
label_range = [1:K];
est_states = [];
for run=1:T
% Fit HMM with 'optimal' K to set of time-series
[p_start, A, phi, loglik] = ChmmGauss(Data, K);
true_states_all = [];
est_states_all = [];
% Calculate p(X) & viterbi decode fo reach time-series
if run==T
% if exist('h0','var') && isvalid(h0), delete(h0);end
h0 = figure('Color',[1 1 1]);
end
for i=1:length(ts)
X = Data{ts(i)};
if isfield(TruePsi, 'sTrueAll')
true_states = TruePsi.s{ts(i)}';
else
true_states = True_states{ts(i)};
end
logp_xn_given_zn = Gauss_logp_xn_given_zn(Data{ts(i)}, phi);
[~,~, logliks(i,run)] = LogForwardBackward(logp_xn_given_zn, p_start, A);
est_states_ = LogViterbiDecode(logp_xn_given_zn, p_start, A);
% Stack labels for state clustering metrics
true_states_all = [true_states_all; true_states];
est_states_all = [est_states_all; est_states_];
% Plot segmentation Results on each time-series
if run==T
subplot(length(ts),1,i);
data_labeled = [X est_states_]';
plotLabeledData( data_labeled, [], strcat('HMM Segmented Time-Series (', num2str(ts(i)),'), K:',num2str(K),', loglik:',num2str(loglik)), [],label_range)
est_states{i} = est_states_;
end
end
% Segmentation Metrics per run
[relabeled_est_states_all, hamming_distance(run),~,~] = mapSequence2Truth(true_states_all,est_states_all);
[~,global_consistency(run), variation_info(run)] = compare_segmentations(true_states_all,est_states_all);
% Cluster Metrics per run
[cluster_purity(run) cluster_NMI(run) cluster_F(run)] = cluster_metrics(true_states_all, est_states_all);
end
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Extract Estimation Statistics %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Overall Stats for HMM segmentation and state clustering
clc;
fprintf('*** Hidden Markov Model Results*** \n Optimal States: %d \n Hamming-Distance: %3.3f (%3.3f) GCE: %3.3f (%3.3f) VO: %3.3f (%3.3f) \n Purity: %3.3f (%3.3f) NMI: %3.3f (%3.3f) F: %3.3f (%3.3f) \n',[K mean(hamming_distance) std(hamming_distance) ...
mean(global_consistency) std(global_consistency) mean(variation_info) std(variation_info) mean(cluster_purity) std(cluster_purity) mean(cluster_NMI) std(cluster_NMI) mean(cluster_F) std(cluster_F)])
% Visualize Transition Matrix and Segmentation from 'Best' Run
if exist('h1','var') && isvalid(h1), delete(h1);end
h1 = plotTransMatrix(A);
% Extract HMM Model parameters and Plot if 2D
% Extract params
HMM_theta = [];
HMM_theta.Mu = phi.mu;
HMM_theta.Sigma = phi.Sigma;
HMM_theta.K = K;
% Plot 2D HMM params
if size(Data{1},2) == 2
title_name = 'Estimated Emission Parameters';
plot_labels = {'$x_1$','$x_2$'};
if exist('h2','var') && isvalid(h2), delete(h2);end
h2 = plotGaussianEmissions2D(HMM_theta, plot_labels, title_name);
end
N = size(Data{1},2);
if N > 2
% Plot Ground Segmentations
if exist('h5','var') && isvalid(h5), delete(h5);end
title_name = {dataset_name,'Ground Truth'};
title_edge = [0 0 0];
h5 = plot3D_labeledTrajectories(Data,True_states,plot_subsample,title_name, title_edge, dataset_view);
% Plot Estimated Segmentations
if exist('h6','var') && isvalid(h6), delete(h6);end
title_name = {dataset_name,'HMM (EM) Segmentation'};
title_edge = [0.470588235294118 0.670588235294118 0.188235294117647];
h6 = plot3D_labeledTrajectories(Data,est_states,plot_subsample,title_name, title_edge, dataset_view);
end
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Step 5: Extract Super States for Geometric Invariant Segmentation %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%% Extract Sigmas %%%%%%%%%%%%
sigmas = [];
for k=1:HMM_theta.K
sigmas{k} = HMM_theta.Sigma(:,:,k);
end
true_labels = [1:HMM_theta.K];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%% Compute Similarity Matrix from B-SPCM Function for dataset %%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%% Set Hyper-parameter %%%%%%%%%%%%%%%%%%%%%%%%%%%
% Tolerance for SPCM decay function
dis_type = 2;
gamma = 3;
spcm = ComputeSPCMfunctionMatrix(sigmas, gamma, dis_type);
D = spcm(:,:,1);
S = spcm(:,:,2);
%%%%%%% Visualize Bounded Similarity Confusion Matrix %%%%%%%%%%%%%%
if exist('h0','var') && isvalid(h0), delete(h0); end
title_str = 'SPCM Similarity Matrix';
h0 = plotSimilarityConfMatrix(S, title_str);
if exist('h1','var') && isvalid(h1), delete(h1); end
title_str = 'SPCM Dis-similarity Matrix';
h1 = plotSimilarityConfMatrix(D, title_str);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%% Embed SDP Matrices in Approximate Euclidean Space %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Choose Embedding implementation
show_plots = 0; % Show plot of similarity matrices+eigenvalues
pow_eigen = 4; % (L^+)^(pow_eigen) for dimensionality selection
[x_emb, Y, d_L_pow] = graphEuclidean_Embedding(S, show_plots, pow_eigen);
emb_name = '(SPCM) Graph-Subspace Projection';
M = size(Y,1);
%%%%%%%% Visualize Approximate Euclidean Embedding %%%%%%%%
plot_options = [];
plot_options.labels = true_labels;
plot_options.title = emb_name;
ml_plot_data(Y',plot_options);
axis equal;
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Step 4: Discover Clusters of Covariance Matrices %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Discover Clusters with GMM-based Clustering Variants on Embedding %%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 0: sim-CRP-MM (Collapsed Gibbs Sampler) on Preferred Embedding
% 1: GMM-EM Model Selection via BIC on Preferred Embedding
% 2: CRP-GMM (Gibbs Sampler/Collapsed) on Preferred Embedding
est_options = [];
est_options.type = 0; % Clustering Estimation Algorithm Type
% If algo 1 selected:
est_options.maxK = 15; % Maximum Gaussians for Type 1
est_options.fixed_K = []; % Fix K and estimate with EM for Type 1
% If algo 0 or 2 selected:
est_options.samplerIter = 250; % Maximum Sampler Iterations
% For type 0: 50-200 iter are needed
% For type 2: 200-1000 iter are needed
% Plotting options
est_options.do_plots = 1; % Plot Estimation Stats
est_options.dataset_name = dataset_name; % Dataset name
est_options.true_labels = true_labels; % To plot against estimates
% Fit GMM to Trajectory Data
tic;
clear Priors Mu Sigma
[Priors, Mu, Sigma, est_labels, stats] = fitgmm_sdp(S, Y, est_options);
toc;
%%%%%%%%%% Compute Cluster Metrics %%%%%%%%%%%%%
[Purity, NMI, F, ARI] = cluster_metrics(true_labels, est_labels');
if exist('true_labels', 'var')
K = length(unique(true_labels));
end
switch est_options.type
case 0
fprintf('---%s Results---\n Iter:%d, LP: %d, Clusters: %d/%d with Purity: %1.2f, NMI Score: %1.2f, ARI: %1.2f, F measure: %1.2f \n', ...
'spcm-CRP-MM (Collapsed-Gibbs)', stats.Psi.Maxiter, stats.Psi.MaxLogProb, length(unique(est_labels)), K, Purity, NMI, ARI, F);
case 1
fprintf('---%s Results---\n Clusters: %d/%d with Purity: %1.2f, NMI Score: %1.2f, ARI: %1.2f, F measure: %1.2f \n', ...
'Finite-GMM (MS-BIC)', length(unique(est_labels)), K, Purity, NMI, ARI, F);
case 2
if isfield(stats,'collapsed')
fprintf('---%s Results---\n Clusters: %d/%d with Purity: %1.2f, NMI Score: %1.2f, ARI: %1.2f, F measure: %1.2f \n', ...
'CRP-GMM (Collapsed-Gibbs)', length(unique(est_labels)), K, Purity, NMI, ARI, F);
else
fprintf('---%s Results---\n Clusters: %d/%d with Purity: %1.2f, NMI Score: %1.2f, ARI: %1.2f, F measure: %1.2f \n', ...
'CRP-GMM (Gibbs)', length(unique(est_labels)), K, Purity, NMI, ARI, F);
end
end
%% Visualize Estimated Parameters
re_estimate = 1;
if M < 4
est_options.emb_name = emb_name;
if re_estimate
[Priors0, Mu0, Sigma0] = gmmOracle(Y, est_labels);
tot_dilation_factor = 1; rel_dilation_fact = 0.2;
Sigma0 = adjust_Covariances(Priors0, Sigma0, tot_dilation_factor, rel_dilation_fact);
[~, est_labels0] = my_gmm_cluster(Y, Priors0, Mu0, Sigma0, 'hard', []);
[h_gmm, h_pdf] = visualizeEstimatedGMM(Y, Priors0, Mu0, Sigma0, est_labels0, est_options);
else
[h_gmm, h_pdf] = visualizeEstimatedGMM(Y, Priors, Mu, Sigma, est_labels, est_options);
end
end
%% Create super states from HMM emission model grouping
est_super_states = [];
est_super_states_3d = [];
for i=1:length(est_states)
est_states_i = est_states{i};
est_super_states_i = [];
for j=1:length(est_states_i)
est_super_states_i(j) = est_labels(est_states_i(j));
end
est_super_states{i} = est_super_states_i;
est_super_states_3d{i} = est_super_states_i';
end
super_labels = unique(est_labels);
% Plot Segmentated 3D Trajectories
N = size(Data{1},2);
if N > 2
% Plot Estimated Segmentations
if exist('h8','var') && isvalid(h8), delete(h8);end
title_name = {strcat(dataset_name,''),' HMM (EM) + SPCM-$\mathcal{CRP}$-GMM'};
title_edge = [1 0 0];
h8 = plot3D_labeledTrajectories(Data,est_super_states_3d,plot_subsample,title_name, title_edge, dataset_view);
end