-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmovingmean.m
403 lines (335 loc) · 15.8 KB
/
movingmean.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
function result=movingmean(data,window,dim,option)
%Calculates the centered moving average of an n-dimensional matrix in any direction.
% result=movingmean(data,window,dim,option)
% Inputs:
% 1) data = The matrix to be averaged.
% 2) window = The window size. This works best as an odd number. If an even
% number is entered then it is rounded down to the next odd number.
% 3) dim = The dimension in which you would like do the moving average.
% This is an optional input. To leave blank use [] place holder in
% function call. Defaults to 1.
% 4) option = which solution algorithm to use. The default option works
% best in most situations, but option 2 works better for wide
% matrices (i.e. 1000000 x 1 or 10 x 1000 x 1000) when solved in the
% shorter dimension. Data size where option 2 is more efficient will
% vary from computer to computre.This is an optional input. To leave
% blank use [] place holder in function call. Defaults to 1.
%
% Example:
% Calculate column moving average of 10000 x 10 matrix with a window size
% of 5 in the 1st dimension using algorithm option 1.
% d=rand(10000,10);
% dd=movingmean(d,5,1,1);
% or
% dd=movingmean(d,5,[],1);
% or
% dd=movingmean(d,5,1,[]);
% or
% dd=movingmean(d,5,1);
% or
% dd=movingmean(d,5,[],[]);
% or
% dd=movingmean(d,5);
%
% Moving mean for each element uses data centered on that element and
% incorporates (window-1)/2 elements before and after the element.
%
% Function is broken into two parts. The 1d-2d solution, and the
% n-dimensional solution. The 1d-2d solution is the fastest that I have
% been able to come up with, whereas the n-dimensional solution trades
% speed for versatility.
%
% Function includes some code at the end so that the user can do their
% own speed testing using the TIMEIT function.
%
% Has been heavily tested in 1d-2d case, and lightly tested in 3d case.
% Should work in n-dimensional space, but has not been tested in more
% than 3 dimensions other than to make sure it does not return an error.
% Has not been tested for complex inputs.
%errors for leaving out data or window inputs
if ~nargin
error('no input data')
end
if nargin<2
error('no window size specified')
end
%defaults the dimension and option to 1 if it is not specified
if nargin<3 || isempty(dim)
dim=1;
end
if nargin<4 || isempty(option)
option=1;
end
%rounds even window sizes down to next lowest odd number
if mod(window,2)==0
window=window-1;
end
%Calculates the number of elements in before and after the central element
%to incorporate in the moving mean. Round command is just present to deal
%with the potential problem of division leaving a very small decimal, ie.
%2.000000000001.
halfspace=round((window-1)/2);
%calculates the size of the input data set
n=size(data);
%returns error messages for incorrect inputs
if mod(dim,1)
error('dimension number must be integer')
end
if ((ndims(data)<=2 && (option<1 || option>3)) || (ndims(data)>=3 && (option<1 ||option>2))) ...
|| mod(option,1)
error('invalid algorithm option')
end
if mod(window,1)
error('window size must be integer')
end
if dim>ndims(data)
error('dimension number exceeds number of dimensions in input matrix')
end
if dim<1
error('dimension number must be >=1')
end
if n(dim)<window
error('window is too large')
end
if ndims(data)<=2
%Solution for 1d-2d situation. Uses vector operations to optimize
%solution speed.
%To simplify algorithm the problem is always solved with dim=1.
%If user input is dim=2 then the data is transposed to calculate the
%solution in dim=1
if dim==2
data=data';
end
%The three best solutions I came up to for the 1d-2d problem. I kept
%them in here to preserve the code incase I want to use some of it
%again. I have found that option 1 is consistenetly the fastest.
%option=1;
switch option
case 1
%option 1, works best for most data sets
%Computes the beginning and ending column for each moving
%average compuation. Divide is the number of elements that are
%incorporated in each moving average.
start=[ones(1,halfspace+1) 2:(n(dim)-halfspace)];
stop=[(1+halfspace):n(dim) ones(1,halfspace)*n(dim)];
divide=stop-start+1;
%Calculates the moving average by calculating the sum of elements
%from the start row to the stop row for each central element,
%and then dividing by the number of elements used in that sum
%to get the average for that central element.
%Implemented by calculating the moving sum of the full data
%set. Cumulative sum for each central element is calculated by
%subtracting the cumulative sum for the row before the start row
%from the cumulative sum for the stop row. Row references are
%adusted to take into account the fact that you can now
%reference a row<1. Divides the series of cumulative sums for
%by the number of elements in each sum to get the moving
%average.
CumulativeSum=cumsum(data);
temp_sum=CumulativeSum(stop,:)-CumulativeSum(max(start-1,1),:);
temp_sum((start==1),:)=bsxfun(@plus,temp_sum((start==1),:),data(1,:));
result=bsxfun(@rdivide,temp_sum,divide');
case 2
%option 2, Can be faster than option 1 in very wide matrices
%(i.e. 100 x 1000 or l000 x 10000) when solving in the shorter
%dimension, but in general it is slower than option 1.
%Uses a for loop to calculate the data from the start row to
%the stop row, and then divides by the number of rows
%incorporated.
result=zeros(size(data));
for i=1:n(dim)
start=max(1,i-halfspace);
stop=min(i+halfspace,n(dim));
result(i,:)=sum(data(start:stop,:),1)/(stop-start+1);
end
case 3
%option 3, Creates a sparse matrix to indicate which rows to include
%data from for the moving average for each central element.
%Uses matrix multiplication to calculte the cumulative sum for
%each central element. Then divides by the number of elements
%used in each cumulative sum to get the average for each
%central element.
%Elegant but slow. I think this is because bsxfun has to do
%a lot of matrix replication. Also, it is easy for the use matrix to
%exceed the maximum matrix dimensions for MATLAB.
start=[ones(1,halfspace+1) 2:(n(dim)-halfspace)];
start=start';
stop=[(1+halfspace):n(dim) ones(1,halfspace)*n(dim)];
stop=stop';
divide=stop-start+1;
baseline=1:n(dim);
use=sparse(n(dim),n(dim),n(dim)*max(divide));
use=(bsxfun(@ge,baseline,start) & bsxfun(@le,baseline,stop));
%uses matrix multiplication instead of repmat or bsxfun, slower than
%using bsxfun
%baseline_mat=ones(n(dim),1)*baseline;
%start_mat=start*ones(1,n(dim));
%stop_mat=stop*ones(1,n(dim));
%use=(baseline_mat>=start_mat & baseline_mat<=stop_mat);
%use=sparse(use);
result=bsxfun(@rdivide,use*data,divide);
end
%Transposes the matrix if the problem was solved in dimension=2 so that
%the ouput is correctly oriented. Undoes the initial transposition.
if dim==2
result=result';
end
else
%Solution of the n-dimensional problem. This solution is
%slower than the 1d-2d solution in a 1d-2d problem, but depending on the
%dimensions of the input array can be faster for the same number of
%elements in a 3d+ problem (ie. 1000000 x 1 vs 100 x 100 x 100).
%Either way it is much more versatile. The algorithm can solve for a
%moving average in any dimension of an n-dimension input matrix.
%Two solutions of the problem. In general option 1 is faster than
%option 2, but I think that for some matrix sizes option 2 will be
%faster.
%option=1;
switch option
case 1
%Option 1 is based on option 1 for the 1d-2d solution. In
%general this is the faster solution. For a more extensive
%explanation of the code see option 1 in the 1d-2d section.
%Calculates the start and stop positions, and creates a vector
%of how many elements are in each moving sum.
start=[ones(1,halfspace+1) 2:(n(dim)-halfspace)];
stop=[(1+halfspace):n(dim) ones(1,halfspace)*n(dim)];
divide=stop-start+1;
%Reorients the start, stop, and divide vectors in the direction
%of the specified dimension.
%Creates a vector to use to reshape the start, stop, and divide
%vectors. Puts one in for the each dimension excpet for the
%specified dimension, which is set to the size of the input
%matrix in that direction. ex. [1 1 100 1] for a
%100 x 100 x 100 x 100 input matrix, solving in the 3rd
%dimension.
temp_dims=ones(size(n));
temp_dims(dim)=n(dim);
%inv_temp_dims=n;
%inv_temp_dims(dim)=1;
%Uses the vector created above to reorient the start, stop, and
%divide vectors in the correct direction.
start=reshape(start,temp_dims);
stop=reshape(stop,temp_dims);
divide=reshape(divide,temp_dims);
%Calculates the cumulative sum of the data in the specified
%direction.
CumulativeSum=cumsum(data,dim);
%I have found the most versatile way of writing an algorithm
%that will solve in any dimension of an unknown dimensional
%input matrix is to built some functions with text that is
%based on the input parameters.
%This text builds the function that subtracts the cumulative
%sum at the stop position from the cumulative sum at one
%element before the start position for each central element.
%Compensates for not have an index less than 1.
function_text=['@(CumulativeSum,start,stop) CumulativeSum(' repmat(':,',1,dim-1) 'stop,' repmat(':,',1,ndims(data)-dim)];
function_text=function_text(1,1:end-1);
function_text=[function_text ')-CumulativeSum(' repmat(':,',1,dim-1) 'max(1,start-1),' repmat(':,',1,ndims(data)-dim)];
function_text=function_text(1,1:end-1);
function_text=[function_text ')'];
%Converts the string above into an anonymous function with the
%handle "difference".
difference=str2func(function_text);
%Runs the function built above to determine the cumulative sum
%between start and stop.
temp_sum=difference(CumulativeSum,start,stop);
%Text to make a function that corrects for the fact that the
%cumulative sum for moving sums that start at position 1 have
%the first data position subtracted out. This is becasue you
%can not have a matrix index less than 1. Uses bsxfun to
%multiply start==1 by the first plane of data (in the
%appropriate dimension) and then adding that to temp_sum. Need
%to do it this way vs using something based on reshaping
%data(repmat(start,inv_temp_dims)==1) because the correction
%needs to be the same size as temp_sum. This is needed b/c I
%do not know how to use text to set the variable elements that
%will accept the addition (i.e.
%temp_sum(:,:,1:sum(start==1))=).
correction_text=['@(temp_sum,start,data) temp_sum+bsxfun(@times,start==1,data(' repmat(':,',1,dim-1) '1,' repmat(':,',1,ndims(data)-dim)];
correction_text=correction_text(1,1:end-1);
correction_text=[correction_text '))'];
%Converts the above text into a function.
correction=str2func(correction_text);
%Applies the correction describe previously to the temp_sum
%matrix
temp_sum=correction(temp_sum,start,data);
%Divides the temp_sum matrix by the divide vector to
%calculate the moving average for each central elemant.
result=bsxfun(@rdivide,temp_sum,divide);
case 2
%An older solution to the n-dimensional moving average problem.
%Based on the approach in option 2 in the 1d-2d solution.
%Depending on matrix dimensions this might be faster than
%option 1 in certain situations (ex input matrix size is 10 x
%1000 x 1000 and the average is done in dimension 1).
%Builds a string for the function used in the for loop.
%Basiscally it builds something like sum(data(:,:,start:stop,:),3) but
%is able to work in any dimensions and have start:stop be in
%any of those dimensions.
function_text='@(start,stop,dim) sum(data(';
for i=1:ndims(data)
if i==dim
if i~=1
function_text=[function_text ',start:stop'];
else
function_text=[function_text 'start:stop'];
end
else
if i~=1
function_text=[function_text ',:'];
else
function_text=[function_text ':'];
end
end
end
function_text=[function_text '),dim)'];
%Converts the text above into a function
moving_sum=str2func(function_text);
%For loop determines the start position, stop position, and
%number of elements for the moving average of each central
%element. Calculates the moving sum for each central element
%in that position, and divides by the number of elements to get
%the moving average.
for i=1:n(dim)
start=max(1,i-halfspace);
stop=min(i+halfspace,n(dim));
temp_m=moving_sum(start,stop,dim)/(stop-start+1);
%This was the most versatile way of reassembling all of the
%moving average matrices back into the final output matrix,
%although it is slow b/c the result matrix keeps resizing.
if i==1
result=temp_m;
else
result=cat(dim,result,temp_m);
end
end
end
end
%code for speed testing
%generates a series of random vectors with 10 to 10000000 cells and then
%times the moving average at each size. Time for each input size is
%stored.
%1d-2d timer test
%data_power=[1 2 3 4 5 6 7];
%data_size=10.^data_power;
%window=5;
%dim=1;
%time=zeros(size(data_size));
%for i=1:size(data_size,2)
% data=rand(data_size(i),1);
% f=@() movingmean(data,window,dim);
% time(i)=timeit(f,1);
%end
%3d timer test
%data_power=[1 2 3];
%data_size=10.^data_power;
%window=5;
%dim=1;
%time=zeros(size(data_size));
%for i=1:size(data_size,2)
% data=rand(data_size(i),100,100);
% f=@() movingmean(data,window,dim);
% time(i)=timeit(f,1);
%end
end