movingmean.m

 function result=movingmean(data,window,dim,option)
%Calculates the centered moving average of an n-dimensional matrix in any direction. 
%   result=movingmean(data,window,dim,option)

%   Inputs: 
%   1)  data = The matrix to be averaged. 
%   2)  window = The window size.  This works best as an odd number.  If an even 
%       number is entered then it is rounded down to the next odd number.  
%   3)  dim = The dimension in which you would like do the moving average.
%       This is an optional input.  To leave blank use [] place holder in
%       function call.  Defaults to 1.
%   4)  option = which solution algorithm to use.  The default option works
%       best in most situations, but option 2 works better for wide
%       matrices (i.e. 1000000 x 1 or 10 x 1000 x 1000) when solved in the
%       shorter dimension.  Data size where option 2 is more efficient will 
%       vary from computer to computre.This is an optional input.  To leave  
%       blank use [] place holder in function call.  Defaults to 1.
% 
%   Example:  
%   Calculate column moving average of 10000 x 10 matrix with a window size 
%   of 5 in the 1st dimension using algorithm option 1.
%   d=rand(10000,10);
%   dd=movingmean(d,5,1,1);
%           or
%   dd=movingmean(d,5,[],1);
%           or
%   dd=movingmean(d,5,1,[]);
%           or
%   dd=movingmean(d,5,1);
%           or
%   dd=movingmean(d,5,[],[]);
%           or
%   dd=movingmean(d,5);
%
%   Moving mean for each element uses data centered on that element and
%   incorporates (window-1)/2 elements before and after the element.
%
%   Function is broken into two parts.  The 1d-2d solution, and the
%   n-dimensional solution.  The 1d-2d solution is the fastest that I have 
%   been able to come up with, whereas the n-dimensional solution trades
%   speed for versatility.
%
%   Function includes some code at the end so that the user can do their
%   own speed testing using the TIMEIT function.
%
%   Has been heavily tested in 1d-2d case, and lightly tested in 3d case.
%   Should work in n-dimensional space, but has not been tested in more 
%   than 3 dimensions other than to make sure it does not return an error.
%   Has not been tested for complex inputs.


%errors for leaving out data or window inputs
if ~nargin
    error('no input data')
end

if nargin<2
    error('no window size specified')
end

%defaults the dimension and option to 1 if it is not specified
if nargin<3 || isempty(dim)
    dim=1;
end

if nargin<4 || isempty(option)
    option=1;
end


%rounds even window sizes down to next lowest odd number
if mod(window,2)==0
    window=window-1;
end

%Calculates the number of elements in before and after the central element
%to incorporate in the moving mean.  Round command is just present to deal
%with the potential problem of division leaving a very small decimal, ie.
%2.000000000001.
halfspace=round((window-1)/2);

%calculates the size of the input data set
n=size(data);

%returns error messages for incorrect inputs
if mod(dim,1)
    error('dimension number must be integer')
end

if ((ndims(data)<=2 && (option<1 || option>3)) || (ndims(data)>=3 && (option<1 ||option>2))) ...
       || mod(option,1)
    error('invalid algorithm option')
end

if mod(window,1)
    error('window size must be integer')
end

if dim>ndims(data)
    error('dimension number exceeds number of dimensions in input matrix')
end

if dim<1
    error('dimension number must be >=1')
end

if n(dim)<window
    error('window is too large')
end


if ndims(data)<=2
    %Solution for 1d-2d situation.  Uses vector operations to optimize
    %solution speed.
    
    %To simplify algorithm the problem is always solved with dim=1.
    %If user input is dim=2 then the data is transposed to calculate the
    %solution in dim=1
    if dim==2
        data=data';
    end
  
    
    %The three best solutions I came up to for the 1d-2d problem.  I kept
    %them in here to preserve the code incase I want to use some of it
    %again.  I have found that option 1 is consistenetly the fastest.
    %option=1;
    switch option
        case 1
            %option 1, works best for most data sets
            
            %Computes the beginning and ending column for each moving
            %average compuation.  Divide is the number of elements that are
            %incorporated in each moving average.
            start=[ones(1,halfspace+1) 2:(n(dim)-halfspace)];
            stop=[(1+halfspace):n(dim) ones(1,halfspace)*n(dim)];
            divide=stop-start+1;
     
            %Calculates the moving average by calculating the sum of elements
            %from the start row to the stop row for each central element,
            %and then dividing by the number of elements used in that sum
            %to get the average for that central element.
            %Implemented by calculating the moving sum of the full data
            %set.  Cumulative sum for each central element is calculated by
            %subtracting the cumulative sum for the row before the start row
            %from the cumulative sum for the stop row.  Row references are
            %adusted to take into account the fact that you can now
            %reference a row<1.  Divides the series of cumulative sums for
            %by the number of elements in each sum to get the moving
            %average.
            CumulativeSum=cumsum(data);
            temp_sum=CumulativeSum(stop,:)-CumulativeSum(max(start-1,1),:);
            temp_sum((start==1),:)=bsxfun(@plus,temp_sum((start==1),:),data(1,:));
            result=bsxfun(@rdivide,temp_sum,divide');
            
        case 2
            %option 2, Can be faster than option 1 in very wide matrices
            %(i.e. 100 x 1000 or l000 x 10000) when solving in the shorter
            %dimension, but in general it is slower than option 1.
            %Uses a for loop to calculate the data from the start row to
            %the stop row, and then divides by the number of rows
            %incorporated.
            
            result=zeros(size(data));
            for i=1:n(dim)
                start=max(1,i-halfspace);
                stop=min(i+halfspace,n(dim));
                result(i,:)=sum(data(start:stop,:),1)/(stop-start+1);
            end
            
        case 3
            %option 3, Creates a sparse matrix to indicate which rows to include
            %data from for the moving average for each central element.
            %Uses matrix multiplication to calculte the cumulative sum for 
            %each central element.  Then divides by the number of elements
            %used in each cumulative sum to get the average for each
            %central element.
            %Elegant but slow.  I think this is because bsxfun has to do
            %a lot of matrix replication.  Also, it is easy for the use matrix to
            %exceed the maximum matrix dimensions for MATLAB.
     
            start=[ones(1,halfspace+1) 2:(n(dim)-halfspace)];
            start=start';
     
            stop=[(1+halfspace):n(dim) ones(1,halfspace)*n(dim)];
            stop=stop';
     
            divide=stop-start+1;
    
            baseline=1:n(dim);
     
            use=sparse(n(dim),n(dim),n(dim)*max(divide));
            use=(bsxfun(@ge,baseline,start) & bsxfun(@le,baseline,stop));
     
            %uses matrix multiplication instead of repmat or bsxfun, slower than
            %using bsxfun
            %baseline_mat=ones(n(dim),1)*baseline;
            %start_mat=start*ones(1,n(dim));
            %stop_mat=stop*ones(1,n(dim));
        
            %use=(baseline_mat>=start_mat & baseline_mat<=stop_mat);
            %use=sparse(use);
    
            result=bsxfun(@rdivide,use*data,divide);
    end

    %Transposes the matrix if the problem was solved in dimension=2 so that
    %the ouput is correctly oriented. Undoes the initial transposition.
    if dim==2
        result=result';
    end
else
    %Solution of the n-dimensional problem.  This solution is 
    %slower than the 1d-2d solution in a 1d-2d problem, but depending on the
    %dimensions of the input array can be faster for the same number of
    %elements in a 3d+ problem (ie. 1000000 x 1 vs 100 x 100 x 100).  
    %Either way it is much more versatile.  The algorithm can solve for a 
    %moving average in any dimension of an n-dimension input matrix.
    
    %Two solutions of the problem.  In general option 1 is faster than
    %option 2, but I think that for some matrix sizes option 2 will be
    %faster.  
    %option=1;
    switch option
 
        case 1
            %Option 1 is based on option 1 for the 1d-2d solution.  In
            %general this is the faster solution.  For a more extensive
            %explanation of the code see option 1 in the 1d-2d section.
            
            %Calculates the start and stop positions, and creates a vector
            %of how many elements are in each moving sum.
            start=[ones(1,halfspace+1) 2:(n(dim)-halfspace)];
            stop=[(1+halfspace):n(dim) ones(1,halfspace)*n(dim)];
            divide=stop-start+1;
            
            %Reorients the start, stop, and divide vectors in the direction
            %of the specified dimension.
            %Creates a vector to use to reshape the start, stop, and divide
            %vectors.  Puts one in for the each dimension excpet for the
            %specified dimension, which is set to the size of the input
            %matrix in that direction.  ex. [1 1 100 1] for a 
            %100 x 100 x 100 x 100 input matrix, solving in the 3rd
            %dimension.
            temp_dims=ones(size(n));
            temp_dims(dim)=n(dim);
            
            %inv_temp_dims=n;
            %inv_temp_dims(dim)=1;
            
            %Uses the vector created above to reorient the start, stop, and
            %divide vectors in the correct direction.
            start=reshape(start,temp_dims);
            stop=reshape(stop,temp_dims);
            divide=reshape(divide,temp_dims);
            
            %Calculates the cumulative sum of the data in the specified
            %direction.
            CumulativeSum=cumsum(data,dim);
            
            %I have found the most versatile way of writing an algorithm
            %that will solve in any dimension of an unknown dimensional
            %input matrix is to built some functions with text that is
            %based on the input parameters.
            
            %This text builds the function that subtracts the cumulative
            %sum at the stop position from the cumulative sum at one
            %element before the start position for each central element.
            %Compensates for not have an index less than 1.
            function_text=['@(CumulativeSum,start,stop) CumulativeSum(' repmat(':,',1,dim-1) 'stop,' repmat(':,',1,ndims(data)-dim)];
            function_text=function_text(1,1:end-1);
            function_text=[function_text ')-CumulativeSum(' repmat(':,',1,dim-1) 'max(1,start-1),' repmat(':,',1,ndims(data)-dim)];
            function_text=function_text(1,1:end-1);
            function_text=[function_text ')'];
            
            %Converts the string above into an anonymous function with the
            %handle "difference".
            difference=str2func(function_text);
            
            %Runs the function built above to determine the cumulative sum
            %between start and stop.
            temp_sum=difference(CumulativeSum,start,stop);
            
            %Text to make a function that corrects for the fact that the
            %cumulative sum for moving sums that start at position 1 have
            %the first data position subtracted out.  This is becasue you
            %can not have a matrix index less than 1.  Uses bsxfun to
            %multiply start==1 by the first plane of data (in the
            %appropriate dimension) and then adding that to temp_sum.  Need
            %to do it this way vs using something based on reshaping
            %data(repmat(start,inv_temp_dims)==1) because the correction
            %needs to be the same size as temp_sum.  This is needed b/c I
            %do not know how to use text to set the variable elements that
            %will accept the addition (i.e.
            %temp_sum(:,:,1:sum(start==1))=).
            correction_text=['@(temp_sum,start,data) temp_sum+bsxfun(@times,start==1,data(' repmat(':,',1,dim-1) '1,' repmat(':,',1,ndims(data)-dim)];
            correction_text=correction_text(1,1:end-1);
            correction_text=[correction_text '))'];
            
            %Converts the above text into a function.
            correction=str2func(correction_text);
            
            %Applies the correction describe previously to the temp_sum
            %matrix
            temp_sum=correction(temp_sum,start,data);
            
            %Divides the temp_sum matrix by the divide vector to
            %calculate the moving average for each central elemant.
            result=bsxfun(@rdivide,temp_sum,divide);
        
        case 2
            %An older solution to the n-dimensional moving average problem.
            
            %Based on the approach in option 2 in the 1d-2d solution.
            %Depending on matrix dimensions this might be faster than
            %option 1 in certain situations (ex input matrix size is 10 x
            %1000 x 1000 and the average is done in dimension 1).
            
            %Builds a string for the function used in the for loop.
            %Basiscally it builds something like sum(data(:,:,start:stop,:),3) but
            %is able to work in any dimensions and have start:stop be in
            %any of those dimensions.
            function_text='@(start,stop,dim) sum(data(';
            for i=1:ndims(data)
                if i==dim 
                    if i~=1
                        function_text=[function_text ',start:stop'];
                    else
                        function_text=[function_text 'start:stop'];
                    end
                else
                    if i~=1
                        function_text=[function_text ',:'];
                    else
                        function_text=[function_text ':'];
                    end
                end 
            end       
       
            function_text=[function_text '),dim)'];
            
            %Converts the text above into a function
            moving_sum=str2func(function_text);
                  
            %For loop determines the start position, stop position, and
            %number of elements for the moving average of each central
            %element.  Calculates the moving sum for each central element
            %in that position, and divides by the number of elements to get
            %the moving average.  
            for i=1:n(dim)
                start=max(1,i-halfspace);
                stop=min(i+halfspace,n(dim));
                
                temp_m=moving_sum(start,stop,dim)/(stop-start+1);
        
                %This was the most versatile way of reassembling all of the
                %moving average matrices back into the final output matrix,
                %although it is slow b/c the result matrix keeps resizing.
                if i==1
                    result=temp_m;
                else
                    result=cat(dim,result,temp_m);
                end
            end
        
         
    end
end


%code for speed testing
%generates a series of random vectors with 10 to 10000000 cells and then
%times the moving average at each size.  Time for each input size is
%stored.

%1d-2d timer test
%data_power=[1 2 3 4 5 6 7];
%data_size=10.^data_power;
%window=5;
%dim=1;
%time=zeros(size(data_size));

%for i=1:size(data_size,2)
%   data=rand(data_size(i),1);
%   f=@() movingmean(data,window,dim);
%   time(i)=timeit(f,1);
%end


%3d timer test
%data_power=[1 2 3];
%data_size=10.^data_power;
%window=5;
%dim=1;
%time=zeros(size(data_size));

%for i=1:size(data_size,2)
%   data=rand(data_size(i),100,100);
%   f=@() movingmean(data,window,dim);
%   time(i)=timeit(f,1);
%end
 end