NeurodataWithoutBorders · ehennestad · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024 · Jan 21, 2025
diff --git a/+io/+config/+internal/applyCustomMatNWBPropertyNames.m b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m
@@ -0,0 +1,130 @@
+function datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
+% applyCustomMatNWBPropertyNames - Processes a dataset configuration structure to apply custom MatNWB property names.
+%
+%   datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
+%
+%   This function iterates through each field of the input structure and checks 
+%   if the field corresponds to a known NWB type (using a mapping from short 
+%   names to fully qualified class names). For each recognized field:
+%
+%      - It retrieves the full class name and determines its superclasses.
+%      - If the class is a subclass of "types.untyped.MetaClass":
+%           * If it is also a "types.untyped.GroupClass", the function recursively
+%             processes the subgroup configuration.
+%           * If it is a "types.untyped.DatasetClass", it wraps the existing 
+%             configuration in a structure with a "data" property.
+%      - If the field is not associated with a recognized NWB type, it remains 
+%        unchanged.
+%
+%   Input:
+%       datasetConfiguration - A 1x1 struct containing dataset configuration 
+%           data.
+%
+%   Output:
+%       datasetConfiguration - The updated configuration structure with custom 
+%           property names.
+
+    arguments
+        datasetConfiguration (1,1) struct
+    end
+
+    fields = fieldnames(datasetConfiguration);
+
+    classNameMap = getNwbTypesClassnameMap();
+
+    for i = 1:numel(fields)
+
+        thisField = fields{i};
+        if ~isKey(classNameMap, thisField)
+            continue % Not a neurodata / nwb type
+        end
+
+        fullClassName = classNameMap(thisField);
+        superclassNames = superclasses(fullClassName);
+
+        if any(strcmp(superclassNames, "types.untyped.MetaClass"))
+            thisSubConfig = datasetConfiguration.(thisField);
+            if any(strcmp(superclassNames, "types.untyped.GroupClass"))
+                % Recursively process subgroups.
+                datasetConfiguration.(thisField) = ...
+                    io.config.internal.applyCustomMatNWBPropertyNames(thisSubConfig);
+            elseif any(strcmp(superclassNames, "types.untyped.DatasetClass"))
+                % Wrap Dataset type configurations in a struct with a "data" field.
+                datasetConfiguration.(thisField) = struct('data', thisSubConfig);
+            else
+                error('NWB:UnexpectedError', 'Something unexpected happened.')
+            end
+        else
+            % For non-NWB types, leave the field unmodified.
+        end
+    end
+end
+
+function ancestorPath = getAncestorPath(initialPath, numSteps)
+% getAncestorPath - Get an ancestor directory path.
+%
+%   ancestorPath = GETANCESTORPATH(initialPath, numSteps)
+%
+%   Input:
+%       initialPath - A string representing the starting file or directory path.
+%       numSteps    - A positive integer indicating the number of directory 
+%                     levels to move up.
+%
+%   Output:
+%       ancestorPath - A string representing the ancestor directory path.
+
+    arguments
+        initialPath (1,1) string
+        numSteps (1,1) double
+    end
+    splitPath = split(initialPath, filesep);
+
+    ancestorPath = fullfile(splitPath{1:end-numSteps}); % char output
+
+    % Ensure the path starts with a file separator on Unix systems.
+    if isunix && ~startsWith(ancestorPath, filesep)
+        ancestorPath = [filesep ancestorPath];
+    end
+end
+
+function map = getNwbTypesClassnameMap()
+% getNwbTypesClassnameMap - Constructs a mapping between NWB type short names 
+% and their fully qualified class names.
+%
+%   map = GETNWBTYPESCLASSNAMEMAP()
+%
+%   The function locates the directory containing NWB type definitions 
+%   (using the location of 'types.core.NWBFile' as a reference) and searches 
+%   recursively for all MATLAB class definition files (*.m). It then filters 
+%   out files in the '+types/+untyped' and '+types/+util' folders.
+%
+%   Output:
+%       map - A mapping object (either a dictionary or containers.Map) where:
+%             * Keys   : Short class names (derived from file names without the .m extension).
+%             * Values : Fully qualified class names in the format "types.namespace.ClassName".
+
+    typesClassDirectory = getAncestorPath( which('types.core.NWBFile'), 2 );
+
+    % Find all MATLAB class files recursively within the directory.
+    L = dir(fullfile(typesClassDirectory, '**', '*.m'));
+
+    % Exclude files from the '+types/+untyped' and '+types/+util' directories.
+    ignore = contains({L.folder}, fullfile('+types', '+untyped')) | ...
+                contains({L.folder}, fullfile('+types', '+util'));
+    L(ignore) = [];
+
+    % Extract namespace and class names from the file paths.
+    [~, namespaceNames] = fileparts({L.folder});
+    namespaceNames = string( strrep(namespaceNames, '+', '') );
+    classNames = string( strrep( {L.name}, '.m', '') );
+
+    % Compose fully qualified class names using the namespace and class name.
+    fullClassNames = matnwb.common.composeFullClassName(namespaceNames, classNames);
+
+    % Create a mapping from the short class names to the fully qualified class names.
+    try
+        map = dictionary(classNames, fullClassNames);
+    catch % Fallback for older versions of MATLAB.
+        map = containers.Map(classNames, fullClassNames);
+    end
+end
diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -0,0 +1,120 @@
+function chunkSize = computeChunkSizeFromConfig(A, datasetConfig)
+% computeChunkSizeFromConfig - Compute the chunk size for a dataset using the provided configuration.
+%   This function determines the chunk size for a dataset based on the chunk
+%   dimensions provided in the datasetConfig structure. It adjusts dimensions 
+%   according to rules: 'max' uses the dataset size, fixed numbers use their 
+%   value, and 'flex' calculates the dimension size to approximate the target 
+%   chunk size in bytes.
+%
+%   Inputs:
+%       A - A numeric dataset whose chunk size is to be computed.
+%       datasetConfig (1,1) struct - Struct defining chunk dimensions and chunk target size.
+%
+%   Output:
+%       chunkSize - A vector specifying the chunk size for each dimension.
+
+    arguments
+        A {mustBeNumeric}
+        datasetConfig (1,1) struct
+    end
+
+    assert(isfield(datasetConfig, 'chunk_dimensions'), ...
+        'Expected datasetConfig to have field "chunk_dimensions"')
+    assert(isfield(datasetConfig, 'target_chunk_size'), ...
+        'Expected datasetConfig to have field "target_chunk_size"')
+
+    % Get dataset size
+    dataSize = size(A);
+    dataSize = fliplr(dataSize);  % matnwb quirk
+    numDimensions = numel(dataSize);
+
+    % Extract chunk dimensions configuration
+    chunkDimensions = datasetConfig.chunk_dimensions;
+    if ~iscell(chunkDimensions) 
+        if isscalar(chunkDimensions)
+            chunkDimensions = {chunkDimensions};
+        else
+            error('Unexpected chunk_dimensions format.');
+        end
+    end
+
+    % Find the chunk dimensions specification matching the number of
+    % dimensions of the input array A
+    numChunkDimensions = cellfun(@numel, chunkDimensions);
+    if any(ismember(numChunkDimensions, numDimensions))
+        chunkDimensions = chunkDimensions{numChunkDimensions == numDimensions};
+    elseif all(numDimensions > numChunkDimensions)
+        chunkDimensions = chunkDimensions{end};
+    else
+        error('NWB:UnexpectedError', 'Unexpected chunk dimension size.')
+    end
+
+    if ~iscell(chunkDimensions)
+        chunkDimensions = arrayfun(@(x) x, chunkDimensions, 'UniformOutput', false);
+    end
+
+    defaultChunkSize = datasetConfig.target_chunk_size.value; % in bytes
+    dataByteSize = io.config.internal.getDataByteSize(A);
+
+    elementSize = io.config.internal.getDataByteSize(A) / numel(A); % bytes per element
+
+    % Determine the target number of elements per chunk.
+    targetNumElements = defaultChunkSize / elementSize;
+
+    % Initialize chunk size array
+    chunkSize = zeros(1, numDimensions);
+    flexDims = false(1, numDimensions);
+
+    assert(iscell(chunkDimensions), "Something unexpected happened")
+
+    isFlex = @(x) ischar(x) && strcmp(x, 'flex');
+    isMax = @(x) ischar(x) && strcmp(x, 'max');
+
+    % Calculate chunk size for each dimension
+    for dim = 1:numDimensions
+        if dim > numel(chunkDimensions)
+            % Use full size for dimensions beyond the specification
+            chunkSize(dim) = dataSize(dim);
+        else
+            dimSpec = chunkDimensions{dim};
+            if isFlex(dimSpec)
+                flexDims(dim) = true;
+                % Leave chunkSize(dim) to be determined.
+            elseif isnumeric(dimSpec)
+                chunkSize(dim) = dimSpec;
+            elseif isMax(dimSpec)
+                chunkSize(dim) = dataSize(dim);
+            else
+                error('Invalid chunk specification for dimension %d.', dim);
+            end
+        end
+    end
+
+    % Compute the product of fixed dimensions (number of elements per chunk).
+    if any(~flexDims)
+        fixedProduct = prod(chunkSize(~flexDims));
+    else
+        fixedProduct = 1;
+    end
+
+    % For flex dimensions, compute the remaining number of elements
+    % and allocate them equally in the exponent space.
+    nFlex = sum(flexDims);
+    if nFlex > 0
+        remainingElements = targetNumElements / fixedProduct;
+        % Ensure remainingElements is at least 1.
+        remainingElements = max(remainingElements, 1);
+        % Compute an equal allocation factor for each flex dimension.
+        elementsPerFlexDimension = nthroot(remainingElements, nFlex);
+        % Assign computed chunk size for each flex dimension.
+        for dim = find(flexDims)
+            proposedSize = max(1, round(elementsPerFlexDimension));
+            % Do not exceed the full dimension size.
+            chunkSize(dim) = min(proposedSize, dataSize(dim));
+        end
+    end
+
+    % Ensure chunk size does not exceed dataset dimensions
+    chunkSize = fliplr(chunkSize);
+    chunkSize = min(chunkSize, dataSize);
+end
diff --git a/+io/+config/+internal/configureDataPipeFromData.m b/+io/+config/+internal/configureDataPipeFromData.m
@@ -0,0 +1,42 @@
+function dataPipe = configureDataPipeFromData(numericData, datasetConfig)
+% configureDataPipeFromData - Configure a DataPipe from numeric data and dataset configuration
+
+    import io.config.internal.computeChunkSizeFromConfig
+    import types.untyped.datapipe.properties.DynamicFilter
+
+    chunkSize = computeChunkSizeFromConfig(numericData, datasetConfig);
+    maxSize = size(numericData);
+
+    dataPipeArgs = {...
+        "data", numericData, ...
+        "maxSize", maxSize, ...
+        "chunkSize", chunkSize };
+
+    hasShuffle = ~isempty(datasetConfig.compression.prefilters)...
+                 && contains(datasetConfig.compression.prefilters, 'shuffle');
+
+    if strcmpi(datasetConfig.compression.algorithm, "Deflate")
+        % Use standard compression filters
+        dataPipeArgs = [ dataPipeArgs, ...
+            {'hasShuffle', hasShuffle, ...
+            'compressionLevel', datasetConfig.compression.level} ...
+            ];
+    else
+        % Create property list of custom filters for dataset creation
+        compressionFilter = DynamicFilter( ...
+            datasetConfig.compression.algorithm, ...
+            datasetConfig.compression.level );
+
+        if hasShuffle
+            shuffleFilter = types.untyped.datapipe.properties.Shuffle();
+            filters = [shuffleFilter compressionFilter];
+        else
+            filters = compressionFilter;
+        end
+        dataPipeArgs = [ dataPipeArgs, ...
+            {'filters', filters} ];
+    end
+
+    % Create the datapipe.
+    dataPipe = types.untyped.DataPipe( dataPipeArgs{:} );
+end
diff --git a/+io/+config/+internal/getDataByteSize.m b/+io/+config/+internal/getDataByteSize.m
@@ -0,0 +1,7 @@
+function byteSize = getDataByteSize(data)
+% getDataByteSize - Get bytesize of a numeric array
+    dataType = class(data);
+    bytesPerDataPoint = io.getMatTypeSize(dataType);
+
+    byteSize = numel(data) .* bytesPerDataPoint;
+end
diff --git a/+io/+config/+internal/reconfigureDataPipe.m b/+io/+config/+internal/reconfigureDataPipe.m
@@ -0,0 +1,3 @@
+function dataPipe = reconfigureDataPipe(dataPipe, datasetConfig)
+    % todo
+end
diff --git a/+io/+config/+internal/resolveDatasetConfigForDataType.m b/+io/+config/+internal/resolveDatasetConfigForDataType.m
@@ -0,0 +1,82 @@
+function resolvedOptions = resolveDatasetConfigForDataType(datasetConfig, nwbObject, datasetName)
+% resolveDatasetConfigForDataType - Resolve the dataset configuration for individual neurodata types
+%   This function resolves the dataset configuration options for a given NWB 
+%   object by traversing the object hierarchy and combining options from the 
+%   most specific type to the base type, as defined in the datasetConfig 
+%   structure.
+%
+%   Input:
+%       datasetConfig (struct): A struct representation of the dataset configuration JSON.
+%       nwbObject (types.untyped.MetaClass): An NWB object whose dataset configuration will be resolved.
+%
+%   Output:
+%       resolvedOptions (struct): A struct containing the resolved dataset configuration options.
+
+    arguments
+        datasetConfig (1,1) struct
+        nwbObject (1,1) types.untyped.MetaClass
+        datasetName (1,1) string
+    end
+
+    % Initialize resolvedOptions with default options.
+    resolvedOptions = datasetConfig.Default;
+
+    % Get the NWB object type hierarchy (from most specific to base type)
+    typeHierarchy = getTypeHierarchy(nwbObject);
+
+    % Traverse the type hierarchy to resolve options
+    for i = numel(typeHierarchy):-1:1
+        typeName = typeHierarchy{i};
+
+        % Check if the neurodata type has a datasetConfig
+        if isfield(datasetConfig, typeName)
+            typeOptions = datasetConfig.(typeName);
+
+            % Is datasetName part of typeOptions?
+            if isfield(typeOptions, datasetName)
+                % Merge options into resolvedOptions
+                datasetOptions = typeOptions.(datasetName);
+                resolvedOptions = mergeStructs(resolvedOptions, datasetOptions);
+            end
+        end
+    end
+end
+
+function typeHierarchy = getTypeHierarchy(nwbObject)
+% getTypeHierarchy - Retrieve the type hierarchy of an NWB object.
+%   This function returns a cell array of type names, starting from the specific
+%   type of the given NWB object up to its base type.
+
+    typeHierarchy = {};  % Initialize an empty cell array
+    currentType = class(nwbObject); % Start with the specific type
+
+    while ~isempty(currentType)
+        shortClassName = regexp(currentType, '[^.]+$', 'match', 'once');
+        typeHierarchy{end+1} = shortClassName; %#ok<AGROW>
+
+        % Use MetaClass information to get the parent type
+        metaClass = meta.class.fromName(currentType);
+        if isempty(metaClass.SuperclassList)
+            break; % Reached the base type
+        end
+        currentType = metaClass.SuperclassList(1).Name;
+    end
+end
+
+function merged = mergeStructs(baseStruct, newStruct)
+% mergeStructs - Merge two structs, with fields in newStruct overriding those in baseStruct.
+
+    merged = baseStruct; % Start with the base struct
+
+    fields = fieldnames(newStruct);
+    for i = 1:numel(fields)
+        field = fields{i};
+        if isstruct(newStruct.(field)) && isfield(baseStruct, field) && isstruct(baseStruct.(field))
+            % Recursively merge if both fields are structs
+            merged.(field) = mergeStructs(baseStruct.(field), newStruct.(field));
+        else
+            % Otherwise, override the field
+            merged.(field) = newStruct.(field);
+        end
+    end
+end