-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcfm.cpp
467 lines (401 loc) · 15.7 KB
/
cfm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
#include "cfm.h"
using std::cout;
using std::endl;
// convert a WxHxC image to a (WH)xC 2D matrix
Matrix img2eigen(const Image &im)
{
int w = im.width();
int h = im.height();
int c = im.channels();
Matrix I(w*h, c);
for(int x=0; x<w; x++) {
for(int y=0; y<h; y++) {
for(int z=0; z<c; z++) {
I(x + y*w, z) = im(x, y, z);
}
}
}
return I;
}
// return a (window area)x(channels) matrix around pixel i,j of a matrix representing
// an image of width
// note: function does not check bounds
Matrix get_window(const Matrix &I, int i, int j, int radius, int image_width)
{
int window_size = 2*radius + 1;
int window_area = window_size * window_size;
int channels = I.cols();
Matrix W(window_area, channels);
int current_row = 0;
for(int dj=-radius; dj<=radius; dj++) {
for(int di=-radius; di<=radius; di++) {
for(int c=0; c<channels; c++) {
W(current_row, c) = I((i+di) + (j+dj)*image_width, c);
}
current_row++;
}
}
return W;
}
// compute average for each channel, returning a 1xC matrix
Matrix mu_window(const Matrix &W)
{
Matrix sums = W.colwise().sum();
return sums / W.rows();
}
// subtract mu from each row of W
Matrix window_minus_mu(const Matrix &W, const Matrix &mu)
{
// convert mu to a Vector
Vector mu_v(mu.cols());
for(int c=0; c<mu.cols(); c++) {
mu_v(c) = mu(0, c);
}
// convert mu_v to a column vector and
// subtract it from each row of W
return W.rowwise() - mu_v.transpose();
}
// compute CxC covariance matrix
Matrix sigma_window(const Matrix &W, const Matrix &mu)
{
Matrix W_minus_mu = window_minus_mu(W, mu);
Matrix sigma = W_minus_mu.transpose() * W_minus_mu;
sigma = sigma / (sigma.rows() * sigma.cols());
return sigma;
}
// compute the matting laplacian matrix given image and mask of known pixels
SparseMatrix matting_laplacian(const Image &im, const Image &known, float epsilon, int radius)
{
if(im.width() != known.width() || im.height() != known.height()) {
throw InvalidArgument();
}
int width = im.width();
int height = im.height();
int N = width*height; // number of pixels
int window_size = 2*radius + 1;
int window_area = window_size * window_size;
int channels = im.channels();
// list of sparse matrix entries, where duplicates will be automatically added together
std::vector<Triplet> triplets;
// preallocate enough memory
triplets.reserve(N*window_area);
// create matrix from image
Matrix I = img2eigen(im);
// loop over all possible window centers
for(int wy=radius; wy<height-radius; wy++) {
for(int wx=radius; wx<width-radius; wx++) {
// discard windows that don't contain at least one unknown pixel
bool has_unknown = false;
for(int dy=-radius; dy<=radius; dy++)
for(int dx=-radius; dx<=radius; dx++)
if(known(wx+dx, wy+dy) < epsilon)
has_unknown = true;
if(!has_unknown) continue;
// get pixels in window
Matrix W = get_window(I, wx, wy, radius, width);
// compute mean and covariance
Matrix mu = mu_window(W);
Matrix sigma = sigma_window(W, mu);
// compute window minus average
// this is the equivalent of (I-mu) in equation 12 of the paper
Matrix W_minus_mu = window_minus_mu(W, mu);
// compute the inverted term in equation 12 of the paper
// (sigma + (epsilon/|w|) I3)^-1
Matrix inv_term = (sigma + (epsilon/(float)window_area) * Matrix::Identity(channels, channels)).inverse();
// compute the matrix product in equation 12 of the paper
// (I-mu) * inv_term * (I-mu)
Matrix mat_product = W_minus_mu * inv_term * W_minus_mu.transpose();
// compute the entire expression inside the summation
// the actual summation is done automatically through
// duplicate entries in the sparse triplet list
Matrix sum_expr = Matrix::Identity(window_area, window_area) - (1.0f/(float)window_area) * (Matrix::Ones(window_area, window_area) + mat_product);
// add triplets to list
// there is an entry in sum_expr for each pair of possible offsets from the window center
for(int dy=-radius; dy<=radius; dy++) {
for(int dx=-radius; dx<=radius; dx++) {
for(int dy2=-radius; dy2<=radius; dy2++) {
for(int dx2=-radius; dx2<=radius; dx2++) {
// output row and column in the matting laplacian
int L_row = (wx+dx) + (wy+dy) *width;
int L_col = (wx+dx2) + (wy+dy2)*width;
// input row and column in sum_expr
int s_row = (dx+radius) + (dy+radius) *window_size;
int s_col = (dx2+radius) + (dy2+radius)*window_size;
// triplet
triplets.emplace_back(L_row, L_col, sum_expr(s_row, s_col));
}
}
}
}
}
}
// create the sparse matrix
SparseMatrix L(N, N);
L.setFromTriplets(triplets.begin(), triplets.end());
return L;
}
// compute matte given image and trimap
Image cfm_with_trimap(const Image &im, const Image &trimap, float lambda, float epsilon, int radius)
{
if(im.width() != trimap.width() || im.height() != trimap.height())
throw InvalidArgument();
int width = im.width();
int height = im.height();
int N = width*height; // number of pixels
// convert trimap to known pixel mask
Image known(width, height, 1);
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
if(trimap(x, y) < epsilon || trimap(x, y) >= 1-epsilon) {
known(x, y) = 1.0f;
} else {
known(x, y) = 0.0f;
}
}
}
//cout << "computing laplacian" << endl;
// compute sparse matting laplacian matrix
SparseMatrix L = matting_laplacian(im, known, epsilon, radius);
// compute bS in equation 14 of the paper
//cout << "computing linear system" << endl;
Matrix known_mat = img2eigen(known);
Matrix trimap_mat = img2eigen(trimap);
Vector bS = known_mat.cwiseProduct(trimap_mat).col(0);
// compute lambda*DS in equation 14 of the paper
// as a sparse matrix
Vector lambda_DS = (lambda*known_mat).col(0);
std::vector<Triplet> triplets;
for(int i=0; i<lambda_DS.size(); i++) {
triplets.emplace_back(i, i, lambda_DS(i));
}
SparseMatrix lambda_DS_sparse(N, N);
lambda_DS_sparse.setFromTriplets(triplets.begin(), triplets.end());
// create the linear system lhs*alpha = rhs
SparseMatrix lhs = L + lambda_DS_sparse;
lhs.makeCompressed();
Vector rhs = lambda*bS;
// solve the sparse linear system
cout << "solving linear system of size " << lhs.nonZeros() << endl;
//Eigen::BiCGSTAB<SparseMatrix, Eigen::IncompleteLUT<float>> solver;
//Eigen::ConjugateGradient<SparseMatrix, Eigen::Lower | Eigen::Upper> solver;
Eigen::SparseLU<SparseMatrix> solver;
//solver.setMaxIterations(5000);
solver.compute(lhs);
Vector alpha = solver.solve(rhs);
//Vector alpha = solver.solveWithGuess(rhs, trimap_mat.col(0));
// clamp values
//cout << "creating matte" << endl;
alpha = alpha.array().min(1.0f).max(0.0f);
// create output
Image matte(width, height, 1);
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
matte(x, y) = alpha(x + y*width);
}
}
return matte;
}
// convert a set of scribbles to a trimap
Image scribble2trimap(const Image &im, const Image &scribble)
{
static const float eps = 0.1;
Image difference = im - scribble;
Image trimap(im.width(), im.height(), 1);
for(int y=0; y<im.height(); y++) {
for(int x=0; x<im.width(); x++) {
float sum_scribble = 0.0f;
float sum_difference = 0.0f;
for(int c=0; c<im.channels(); c++) {
sum_scribble += scribble(x, y, c);
sum_difference += abs(difference(x, y, c));
}
// if no difference between original and scribble, it's not scribbled over
if(sum_difference < 1e-6) {
trimap(x, y) = 0.5f;
continue;
}
// if all channels are near zero, it's a black scribble
if(sum_scribble < eps)
trimap(x, y) = 0.0f;
// if all channels are near one, it's a white scribble
else if(sum_scribble > im.channels()-eps)
trimap(x, y) = 1.0f;
// otherwise it's not a scribble
else
trimap(x, y) = 0.5f;
}
}
return trimap;
}
// generate a trimap guess for landscape photos
Image sky_ground_trimap(const Image &im, float frac_sky, float frac_ground)
{
int sky_limit = frac_sky * im.height();
int ground_limit = im.height() - (frac_ground * im.height());
Image trimap(im.width(), im.height(), 1);
for(int y=0; y<im.height(); y++) {
for(int x=0; x<im.width(); x++) {
if(y <= sky_limit)
trimap(x, y) = 0.0f;
else if(y >= ground_limit)
trimap(x, y) = 1.0f;
else
trimap(x, y) = 0.5f;
}
}
return trimap;
}
Image threshold_matte(const Image &matte, float lo, float hi)
{
Image out(matte.width(), matte.height(), 1);
for(int y=0; y<matte.height(); y++) {
for(int x=0; x<matte.width(); x++) {
if(matte(x, y) < lo)
out(x, y) = 0.0f;
else if(matte(x, y) > hi)
out(x, y) = 1.0f;
else
out(x, y) = matte(x, y);
}
}
return out;
}
Image iterative_cfm(const Image &im, const Image &trimap, float threshold, float decimation, int max_size, float lambda, float epsilon, int radius)
{
if(im.width() != trimap.width() || im.height() != trimap.height())
throw InvalidArgument();
int width = im.width();
int height = im.height();
int N = width*height; // number of pixels
// for small enough images, directly solve it
if(N <= max_size) {
return cfm_with_trimap(im, trimap, lambda, epsilon, radius);
}
// otherwise, scale image smaller and solve recursively
Image im_half = scaleLin(im, decimation);
Image trimap_half = scaleLin(trimap, decimation);
Image alpha_half = iterative_cfm(im_half, trimap_half, threshold, decimation, max_size, lambda, epsilon, radius);
// given solution to image at half scale
// scale up, threshold known pixels, and solve ambiguous pixels
Image alpha_half_double = scaleLin(alpha_half, 1.0f/decimation);
Image new_alpha(im.width(), im.height(), 1);
for(int y=0; y<im.height(); y++) {
for(int x=0; x<im.width(); x++) {
// copy over interpolated alpha values
new_alpha(x, y) = alpha_half_double.smartAccessor(x, y, 0, true);
// copy over known alpha values
if(trimap(x, y) < epsilon || trimap(x, y) >= 1-epsilon) {
new_alpha(x, y) = trimap(x, y);
}
}
}
Image new_trimap = threshold_matte(new_alpha, threshold, 1.0f-threshold);
return cfm_with_trimap(im, new_trimap, lambda, epsilon, radius);
}
// compute foreground and background given matte
// this is a very simple approximation instead of the method described in the paper
void compute_fg_bg(const Image &im, const Image &matte, Image &fg_out, Image &bg_out, float fg_threshold, float bg_threshold)
{
if(im.width() != matte.width() || im.height() != matte.height())
throw InvalidArgument();
if(im.width() != fg_out.width() || im.height() != fg_out.height() || im.channels() != fg_out.channels())
throw InvalidArgument();
if(im.width() != bg_out.width() || im.height() != bg_out.height() || im.channels() != bg_out.channels())
throw InvalidArgument();
int width = im.width();
int height = im.height();
int channels = im.channels();
Image matte_th = threshold_matte(matte, bg_threshold, fg_threshold);
// make the background image by alpha-blending
// between image color and black
for(int c=0; c<channels; c++) {
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
bg_out(x, y, c) = (1-matte_th(x, y)) * im(x, y, c); // + matte(x, y) * 0
}
}
}
// make the foreground image by solving the matting equation
// (equation 1 in the paper) for all foreground pixels
// F = (I - (1-alpha)B) / alpha
for(int c=0; c<channels; c++) {
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
// solve for foreground value given matte and background
if(matte(x, y) > 0.0f) {
fg_out(x, y, c) = (im(x, y, c) - (1.0f-matte(x, y)) * bg_out(x, y, c)) / (matte(x, y));
fg_out(x, y, c) = std::min(std::max(fg_out(x, y, c), 0.0f), 1.0f);
} else {
fg_out(x, y, c) = 1.0f;
}
}
}
}
/*
// make the foreground image by alpha-blending
// between image color and white
for(int c=0; c<channels; c++) {
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
fg_out(x, y, c) = matte_th(x, y) * im(x, y, c) + (1.0f-matte_th(x, y)) * 1.0f;
}
}
}
*/
}
// blend foreground and background given a matte
// this is a simple application of equation 1 in the paper
Image blend_fg_bg(const Image &matte, const Image &fg, const Image &bg)
{
if(matte.width() != fg.width() || matte.height() != fg.height())
throw InvalidArgument();
if(matte.width() != bg.width() || matte.height() != bg.height())
throw InvalidArgument();
if(fg.channels() != bg.channels())
throw InvalidArgument();
int width = matte.width();
int height = matte.height();
int channels = fg.channels();
Image out(width, height, channels);
for(int c=0; c<channels; c++) {
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
out(x, y, c) = matte(x, y) * fg(x, y, c) + (1.0f-matte(x, y)) * bg(x, y, c);
}
}
}
return out;
}
/* average color
// we assume that the background has a near-uniform color
// compute average color of background
float color[channels] = { 0 };
float count[channels] = { 0 };
for(int c=0; c<channels; c++) {
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
color[c] += bg_mask(x, y) * im(x, y, c);
count[c] += bg_mask(x, y);
}
}
color[c] /= count[c];
}
for(int c=0; c<channels; c++)
color[c] = 0.0f;
// make the background image by alpha-blending
// between image color and average color of background pixels
for(int c=0; c<channels; c++) {
for(int y=0; y<height; y++) {
for(int x=0; x<width; x++) {
if(bg_mask(x, y) > 0.0f) {
float amount_bg = (bg_threshold - matte(x, y)) / bg_threshold;
float amount_avg = matte(x, y) / bg_threshold;
bg_out(x, y, c) = amount_bg * im(x, y, c) + amount_avg * color[c];
} else {
// fill non-background pixels with average color
bg_out(x, y, c) = color[c];
}
}
}
}
*/