diff --git a/+io/+config/+internal/applyCustomMatNWBPropertyNames.m b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m new file mode 100644 index 00000000..4fa82c3b --- /dev/null +++ b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m @@ -0,0 +1,130 @@ +function datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration) +% applyCustomMatNWBPropertyNames - Processes a dataset configuration structure to apply custom MatNWB property names. +% +% datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration) +% +% This function iterates through each field of the input structure and checks +% if the field corresponds to a known NWB type (using a mapping from short +% names to fully qualified class names). For each recognized field: +% +% - It retrieves the full class name and determines its superclasses. +% - If the class is a subclass of "types.untyped.MetaClass": +% * If it is also a "types.untyped.GroupClass", the function recursively +% processes the subgroup configuration. +% * If it is a "types.untyped.DatasetClass", it wraps the existing +% configuration in a structure with a "data" property. +% - If the field is not associated with a recognized NWB type, it remains +% unchanged. +% +% Input: +% datasetConfiguration - A 1x1 struct containing dataset configuration +% data. +% +% Output: +% datasetConfiguration - The updated configuration structure with custom +% property names. + + arguments + datasetConfiguration (1,1) struct + end + + fields = fieldnames(datasetConfiguration); + + classNameMap = getNwbTypesClassnameMap(); + + for i = 1:numel(fields) + + thisField = fields{i}; + if ~isKey(classNameMap, thisField) + continue % Not a neurodata / nwb type + end + + fullClassName = classNameMap(thisField); + superclassNames = superclasses(fullClassName); + + if any(strcmp(superclassNames, "types.untyped.MetaClass")) + thisSubConfig = datasetConfiguration.(thisField); + if any(strcmp(superclassNames, "types.untyped.GroupClass")) + % Recursively process subgroups. + datasetConfiguration.(thisField) = ... + io.config.internal.applyCustomMatNWBPropertyNames(thisSubConfig); + elseif any(strcmp(superclassNames, "types.untyped.DatasetClass")) + % Wrap Dataset type configurations in a struct with a "data" field. + datasetConfiguration.(thisField) = struct('data', thisSubConfig); + else + error('NWB:UnexpectedError', 'Something unexpected happened.') + end + else + % For non-NWB types, leave the field unmodified. + end + end +end + +function ancestorPath = getAncestorPath(initialPath, numSteps) +% getAncestorPath - Get an ancestor directory path. +% +% ancestorPath = GETANCESTORPATH(initialPath, numSteps) +% +% Input: +% initialPath - A string representing the starting file or directory path. +% numSteps - A positive integer indicating the number of directory +% levels to move up. +% +% Output: +% ancestorPath - A string representing the ancestor directory path. + + arguments + initialPath (1,1) string + numSteps (1,1) double + end + splitPath = split(initialPath, filesep); + + ancestorPath = fullfile(splitPath{1:end-numSteps}); % char output + + % Ensure the path starts with a file separator on Unix systems. + if isunix && ~startsWith(ancestorPath, filesep) + ancestorPath = [filesep ancestorPath]; + end +end + +function map = getNwbTypesClassnameMap() +% getNwbTypesClassnameMap - Constructs a mapping between NWB type short names +% and their fully qualified class names. +% +% map = GETNWBTYPESCLASSNAMEMAP() +% +% The function locates the directory containing NWB type definitions +% (using the location of 'types.core.NWBFile' as a reference) and searches +% recursively for all MATLAB class definition files (*.m). It then filters +% out files in the '+types/+untyped' and '+types/+util' folders. +% +% Output: +% map - A mapping object (either a dictionary or containers.Map) where: +% * Keys : Short class names (derived from file names without the .m extension). +% * Values : Fully qualified class names in the format "types.namespace.ClassName". + + typesClassDirectory = getAncestorPath( which('types.core.NWBFile'), 2 ); + + % Find all MATLAB class files recursively within the directory. + L = dir(fullfile(typesClassDirectory, '**', '*.m')); + + % Exclude files from the '+types/+untyped' and '+types/+util' directories. + ignore = contains({L.folder}, fullfile('+types', '+untyped')) | ... + contains({L.folder}, fullfile('+types', '+util')); + L(ignore) = []; + + % Extract namespace and class names from the file paths. + [~, namespaceNames] = fileparts({L.folder}); + namespaceNames = string( strrep(namespaceNames, '+', '') ); + classNames = string( strrep( {L.name}, '.m', '') ); + + % Compose fully qualified class names using the namespace and class name. + fullClassNames = matnwb.common.composeFullClassName(namespaceNames, classNames); + + % Create a mapping from the short class names to the fully qualified class names. + try + map = dictionary(classNames, fullClassNames); + catch % Fallback for older versions of MATLAB. + map = containers.Map(classNames, fullClassNames); + end +end diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m new file mode 100644 index 00000000..a9eec0e9 --- /dev/null +++ b/+io/+config/+internal/computeChunkSizeFromConfig.m @@ -0,0 +1,120 @@ +function chunkSize = computeChunkSizeFromConfig(A, datasetConfig) +% computeChunkSizeFromConfig - Compute the chunk size for a dataset using the provided configuration. +% This function determines the chunk size for a dataset based on the chunk +% dimensions provided in the datasetConfig structure. It adjusts dimensions +% according to rules: 'max' uses the dataset size, fixed numbers use their +% value, and 'flex' calculates the dimension size to approximate the target +% chunk size in bytes. +% +% Inputs: +% A - A numeric dataset whose chunk size is to be computed. +% datasetConfig (1,1) struct - Struct defining chunk dimensions and chunk target size. +% +% Output: +% chunkSize - A vector specifying the chunk size for each dimension. + + arguments + A {mustBeNumeric} + datasetConfig (1,1) struct + end + + assert(isfield(datasetConfig, 'chunk_dimensions'), ... + 'Expected datasetConfig to have field "chunk_dimensions"') + assert(isfield(datasetConfig, 'target_chunk_size'), ... + 'Expected datasetConfig to have field "target_chunk_size"') + + % Get dataset size + dataSize = size(A); + dataSize = fliplr(dataSize); % matnwb quirk + numDimensions = numel(dataSize); + + % Extract chunk dimensions configuration + chunkDimensions = datasetConfig.chunk_dimensions; + if ~iscell(chunkDimensions) + if isscalar(chunkDimensions) + chunkDimensions = {chunkDimensions}; + else + error('Unexpected chunk_dimensions format.'); + end + end + + % Find the chunk dimensions specification matching the number of + % dimensions of the input array A + numChunkDimensions = cellfun(@numel, chunkDimensions); + if any(ismember(numChunkDimensions, numDimensions)) + chunkDimensions = chunkDimensions{numChunkDimensions == numDimensions}; + elseif all(numDimensions > numChunkDimensions) + chunkDimensions = chunkDimensions{end}; + else + error('NWB:UnexpectedError', 'Unexpected chunk dimension size.') + end + + if ~iscell(chunkDimensions) + chunkDimensions = arrayfun(@(x) x, chunkDimensions, 'UniformOutput', false); + end + + defaultChunkSize = datasetConfig.target_chunk_size.value; % in bytes + dataByteSize = io.config.internal.getDataByteSize(A); + + elementSize = io.config.internal.getDataByteSize(A) / numel(A); % bytes per element + + % Determine the target number of elements per chunk. + targetNumElements = defaultChunkSize / elementSize; + + % Initialize chunk size array + chunkSize = zeros(1, numDimensions); + flexDims = false(1, numDimensions); + + assert(iscell(chunkDimensions), "Something unexpected happened") + + isFlex = @(x) ischar(x) && strcmp(x, 'flex'); + isMax = @(x) ischar(x) && strcmp(x, 'max'); + + % Calculate chunk size for each dimension + for dim = 1:numDimensions + if dim > numel(chunkDimensions) + % Use full size for dimensions beyond the specification + chunkSize(dim) = dataSize(dim); + else + dimSpec = chunkDimensions{dim}; + if isFlex(dimSpec) + flexDims(dim) = true; + % Leave chunkSize(dim) to be determined. + elseif isnumeric(dimSpec) + chunkSize(dim) = dimSpec; + elseif isMax(dimSpec) + chunkSize(dim) = dataSize(dim); + else + error('Invalid chunk specification for dimension %d.', dim); + end + end + end + + % Compute the product of fixed dimensions (number of elements per chunk). + if any(~flexDims) + fixedProduct = prod(chunkSize(~flexDims)); + else + fixedProduct = 1; + end + + % For flex dimensions, compute the remaining number of elements + % and allocate them equally in the exponent space. + nFlex = sum(flexDims); + if nFlex > 0 + remainingElements = targetNumElements / fixedProduct; + % Ensure remainingElements is at least 1. + remainingElements = max(remainingElements, 1); + % Compute an equal allocation factor for each flex dimension. + elementsPerFlexDimension = nthroot(remainingElements, nFlex); + % Assign computed chunk size for each flex dimension. + for dim = find(flexDims) + proposedSize = max(1, round(elementsPerFlexDimension)); + % Do not exceed the full dimension size. + chunkSize(dim) = min(proposedSize, dataSize(dim)); + end + end + + % Ensure chunk size does not exceed dataset dimensions + chunkSize = fliplr(chunkSize); + chunkSize = min(chunkSize, dataSize); +end diff --git a/+io/+config/+internal/configureDataPipeFromData.m b/+io/+config/+internal/configureDataPipeFromData.m new file mode 100644 index 00000000..d4b8eff9 --- /dev/null +++ b/+io/+config/+internal/configureDataPipeFromData.m @@ -0,0 +1,42 @@ +function dataPipe = configureDataPipeFromData(numericData, datasetConfig) +% configureDataPipeFromData - Configure a DataPipe from numeric data and dataset configuration + + import io.config.internal.computeChunkSizeFromConfig + import types.untyped.datapipe.properties.DynamicFilter + + chunkSize = computeChunkSizeFromConfig(numericData, datasetConfig); + maxSize = size(numericData); + + dataPipeArgs = {... + "data", numericData, ... + "maxSize", maxSize, ... + "chunkSize", chunkSize }; + + hasShuffle = ~isempty(datasetConfig.compression.prefilters)... + && contains(datasetConfig.compression.prefilters, 'shuffle'); + + if strcmpi(datasetConfig.compression.algorithm, "Deflate") + % Use standard compression filters + dataPipeArgs = [ dataPipeArgs, ... + {'hasShuffle', hasShuffle, ... + 'compressionLevel', datasetConfig.compression.level} ... + ]; + else + % Create property list of custom filters for dataset creation + compressionFilter = DynamicFilter( ... + datasetConfig.compression.algorithm, ... + datasetConfig.compression.level ); + + if hasShuffle + shuffleFilter = types.untyped.datapipe.properties.Shuffle(); + filters = [shuffleFilter compressionFilter]; + else + filters = compressionFilter; + end + dataPipeArgs = [ dataPipeArgs, ... + {'filters', filters} ]; + end + + % Create the datapipe. + dataPipe = types.untyped.DataPipe( dataPipeArgs{:} ); +end diff --git a/+io/+config/+internal/getDataByteSize.m b/+io/+config/+internal/getDataByteSize.m new file mode 100644 index 00000000..b24a6617 --- /dev/null +++ b/+io/+config/+internal/getDataByteSize.m @@ -0,0 +1,7 @@ +function byteSize = getDataByteSize(data) +% getDataByteSize - Get bytesize of a numeric array + dataType = class(data); + bytesPerDataPoint = io.getMatTypeSize(dataType); + + byteSize = numel(data) .* bytesPerDataPoint; +end diff --git a/+io/+config/+internal/reconfigureDataPipe.m b/+io/+config/+internal/reconfigureDataPipe.m new file mode 100644 index 00000000..3c68a046 --- /dev/null +++ b/+io/+config/+internal/reconfigureDataPipe.m @@ -0,0 +1,3 @@ +function dataPipe = reconfigureDataPipe(dataPipe, datasetConfig) + % todo +end diff --git a/+io/+config/+internal/resolveDatasetConfigForDataType.m b/+io/+config/+internal/resolveDatasetConfigForDataType.m new file mode 100644 index 00000000..b058519d --- /dev/null +++ b/+io/+config/+internal/resolveDatasetConfigForDataType.m @@ -0,0 +1,82 @@ +function resolvedOptions = resolveDatasetConfigForDataType(datasetConfig, nwbObject, datasetName) +% resolveDatasetConfigForDataType - Resolve the dataset configuration for individual neurodata types +% This function resolves the dataset configuration options for a given NWB +% object by traversing the object hierarchy and combining options from the +% most specific type to the base type, as defined in the datasetConfig +% structure. +% +% Input: +% datasetConfig (struct): A struct representation of the dataset configuration JSON. +% nwbObject (types.untyped.MetaClass): An NWB object whose dataset configuration will be resolved. +% +% Output: +% resolvedOptions (struct): A struct containing the resolved dataset configuration options. + + arguments + datasetConfig (1,1) struct + nwbObject (1,1) types.untyped.MetaClass + datasetName (1,1) string + end + + % Initialize resolvedOptions with default options. + resolvedOptions = datasetConfig.Default; + + % Get the NWB object type hierarchy (from most specific to base type) + typeHierarchy = getTypeHierarchy(nwbObject); + + % Traverse the type hierarchy to resolve options + for i = numel(typeHierarchy):-1:1 + typeName = typeHierarchy{i}; + + % Check if the neurodata type has a datasetConfig + if isfield(datasetConfig, typeName) + typeOptions = datasetConfig.(typeName); + + % Is datasetName part of typeOptions? + if isfield(typeOptions, datasetName) + % Merge options into resolvedOptions + datasetOptions = typeOptions.(datasetName); + resolvedOptions = mergeStructs(resolvedOptions, datasetOptions); + end + end + end +end + +function typeHierarchy = getTypeHierarchy(nwbObject) +% getTypeHierarchy - Retrieve the type hierarchy of an NWB object. +% This function returns a cell array of type names, starting from the specific +% type of the given NWB object up to its base type. + + typeHierarchy = {}; % Initialize an empty cell array + currentType = class(nwbObject); % Start with the specific type + + while ~isempty(currentType) + shortClassName = regexp(currentType, '[^.]+$', 'match', 'once'); + typeHierarchy{end+1} = shortClassName; %#ok + + % Use MetaClass information to get the parent type + metaClass = meta.class.fromName(currentType); + if isempty(metaClass.SuperclassList) + break; % Reached the base type + end + currentType = metaClass.SuperclassList(1).Name; + end +end + +function merged = mergeStructs(baseStruct, newStruct) +% mergeStructs - Merge two structs, with fields in newStruct overriding those in baseStruct. + + merged = baseStruct; % Start with the base struct + + fields = fieldnames(newStruct); + for i = 1:numel(fields) + field = fields{i}; + if isstruct(newStruct.(field)) && isfield(baseStruct, field) && isstruct(baseStruct.(field)) + % Recursively merge if both fields are structs + merged.(field) = mergeStructs(baseStruct.(field), newStruct.(field)); + else + % Otherwise, override the field + merged.(field) = newStruct.(field); + end + end +end diff --git a/+io/+config/applyDatasetConfiguration.m b/+io/+config/applyDatasetConfiguration.m new file mode 100644 index 00000000..cbba8193 --- /dev/null +++ b/+io/+config/applyDatasetConfiguration.m @@ -0,0 +1,84 @@ +function applyDatasetConfiguration(nwbObject, datasetConfiguration, options) +% applyDatasetConfiguration - Apply dataset configuration to datasets of an NWB object + + arguments + nwbObject (1,1) NwbFile + datasetConfiguration (1,1) struct = io.config.readDatasetConfiguration() + options.OverrideExisting (1,1) logical = false + end + + import io.config.internal.resolveDatasetConfigForDataType + + neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject); + + for iNeurodataObject = 1:numel(neurodataObjects) + thisNeurodataObject = neurodataObjects{iNeurodataObject}; + thisNeurodataClassName = class(thisNeurodataObject); + + % A dataset can be defined on multiple levels of the class hierarchy, + % so need to keep track of which datasets have been processed. + processedDatasets = string.empty; + + isFinished = false; + while ~isFinished % Iterate over type and it's ancestor types (superclasses) + + datasetNames = schemes.listDatasetsOfNeurodataType( thisNeurodataClassName ); + for thisDatasetName = datasetNames % Iterate over all datasets of a type + + if ismember(thisDatasetName, processedDatasets) + continue + end + + datasetConfig = resolveDatasetConfigForDataType(... + datasetConfiguration, ... + thisNeurodataObject, ... + thisDatasetName); + + datasetData = thisNeurodataObject.(thisDatasetName); + + if isnumeric(datasetData) + % Create a datapipe object for a numeric dataset value. + dataByteSize = io.config.internal.getDataByteSize(datasetData); + if dataByteSize > datasetConfig.target_chunk_size.value + dataPipe = io.config.internal.configureDataPipeFromData(datasetData, datasetConfig); + end + elseif isa(datasetData, 'types.untyped.DataPipe') + if options.OverrideExisting + dataPipe = io.config.internal.reconfigureDataPipe(datasetData, datasetConfig); + end + elseif isa(datasetData, 'types.untyped.DataStub') + % todo + % error('Not implemented for files obtained by nwbRead') + else + disp( class(datasetData) ) + end + + if exist('dataPipe', 'var') + thisNeurodataObject.(thisDatasetName) = dataPipe; + processedDatasets = [processedDatasets, thisDatasetName]; %#ok + clear dataPipe + end + end + + parentType = matnwb.common.getParentType(thisNeurodataClassName); + + if isempty(parentType) + isFinished = true; + else + thisNeurodataClassName = parentType; + end + end + end +end + +function neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject) +% getNeurodataObjectsFromNwbObject - Return all neurodata objects in a NwbFile object + + objectMap = nwbObject.searchFor('types.'); + + neurodataObjects = objectMap.values(); + neurodataClassNames = cellfun(@(c) class(c), neurodataObjects, 'uni', 0); + + toIgnore = startsWith(neurodataClassNames, "types.untyped"); + neurodataObjects(toIgnore) = []; +end diff --git a/+io/+config/readDatasetConfiguration.m b/+io/+config/readDatasetConfiguration.m new file mode 100644 index 00000000..38c44ead --- /dev/null +++ b/+io/+config/readDatasetConfiguration.m @@ -0,0 +1,39 @@ +function datasetConfig = readDatasetConfiguration(profile) +% READDATASETCONFIGURATION Reads the default dataset configuration from a JSON file. +% +% Syntax: +% configObject = io.config.READDATASETCONFIGURATION() loads the default +% dataset configuration parameters from a JSON file located in the +% "configuration" folder in the MatNWB root directory. +% +% configObject = io.config.READDATASETCONFIGURATION(profile) loads the +% dataset configuration parameters for the specified "configuration profile" +% from a JSON file located in the "configuration" folder in the MatNWB root +% directory. +% +% Output Arguments: +% - datasetConfig - A MATLAB structure containing the dataset configuration +% parameters (chunking & compression) defined in the JSON +% configuration file. +% +% Example 1 - Load default dataset configurations:: +% +% % Load the default dataset configuration +% datasetConfig = io.config.readDatasetConfiguration(); +% disp(datasetConfig); + + arguments + profile (1,1) string {mustBeMember(profile, [ ... + "default", ... + "cloud", ... + "archive" + ])} = "default" + end + + filename = sprintf('%s_dataset_configuration.json', profile); + + configFilePath = fullfile(misc.getMatnwbDir, 'configuration', filename); + datasetConfig = jsondecode(fileread(configFilePath)); + + datasetConfig = io.config.internal.applyCustomMatNWBPropertyNames(datasetConfig); +end diff --git a/+matnwb/+common/composeFullClassName.m b/+matnwb/+common/composeFullClassName.m new file mode 100644 index 00000000..1c08a09a --- /dev/null +++ b/+matnwb/+common/composeFullClassName.m @@ -0,0 +1,9 @@ +function fullClassName = composeFullClassName(namespaceName, neurodataType) + arguments + namespaceName (:, 1) string + neurodataType (:, 1) string + end + + fullClassName = compose("types.%s.%s", namespaceName, neurodataType); + fullClassName = transpose(fullClassName); % Return as row vector +end diff --git a/+matnwb/+common/getParentType.m b/+matnwb/+common/getParentType.m new file mode 100644 index 00000000..ba087ab5 --- /dev/null +++ b/+matnwb/+common/getParentType.m @@ -0,0 +1,7 @@ +function parentTypeClassName = getParentType(typeClassName) + mc = meta.class.fromName(typeClassName); + parentTypeClassName = mc.SuperclassList(1).Name; + if strcmp(parentTypeClassName, "types.untyped.MetaClass") + parentTypeClassName = string.empty; + end +end diff --git a/+schemes/listDatasetsOfNeurodataType.m b/+schemes/listDatasetsOfNeurodataType.m new file mode 100644 index 00000000..216f5045 --- /dev/null +++ b/+schemes/listDatasetsOfNeurodataType.m @@ -0,0 +1,55 @@ +function datasetNames = listDatasetsOfNeurodataType(typeClassName) +% listDatasetsOfNeurodataType - List names of datasets of a neurodata type +% +% Input Arguments: +% - typeClassName (string) - +% Full MatNWB class name for a neurodata type, i.e "types.core.TimeSeries" +% +% Output Arguments: +% - datasetNames (string) - +% Names of datasets contained in the specified neurodata type + + arguments + typeClassName (1,1) string + end + + classNameSplit = string( split(typeClassName, '.') ); + typesIdx = find(classNameSplit == "types"); + + assert(~isempty(typesIdx), 'Expected class name to contain "types"') + namespaceName = classNameSplit(typesIdx+1); + namespaceName = strrep(namespaceName, '_', '-'); + namespace = schemes.loadNamespace(namespaceName, misc.getMatnwbDir); + + neurodataTypeName = classNameSplit(typesIdx+2); + typeScheme = namespace.registry(neurodataTypeName); + + switch typeScheme('class_type') + case 'groups' + if isKey(typeScheme, 'datasets') + datasetMaps = typeScheme('datasets'); + + datasetNames = repmat("", size(datasetMaps)); + for i = 1:numel(datasetMaps) + if isKey(datasetMaps{i}, 'name') + datasetNames(i) = datasetMaps{i}('name'); + elseif isKey(datasetMaps{i}, 'data_type_inc') + datasetNames(i) = lower( datasetMaps{i}('data_type_inc') ); + elseif isKey(datasetMaps{i}, 'data_type_def') + datasetNames(i) = lower( datasetMaps{i}('data_type_def') ); + else + keyboard + error('NWB:UnexpectedError', 'Something unexpected happened.') + end + end + datasetNames(datasetNames=="") = []; + else + datasetNames = string.empty; + end + + case 'datasets' + datasetNames = "data"; + otherwise + error('Unexpected class type') + end +end diff --git a/+tests/+unit/+io/+config/DatasetConfigurationTest.m b/+tests/+unit/+io/+config/DatasetConfigurationTest.m new file mode 100644 index 00000000..c285a36a --- /dev/null +++ b/+tests/+unit/+io/+config/DatasetConfigurationTest.m @@ -0,0 +1,132 @@ +classdef DatasetConfigurationTest < matlab.unittest.TestCase +% Tests for io.config.applyDatasetConfiguration function + + properties + DefaultConfig + end + + methods(TestMethodSetup) + function setup(testCase) + % Setup default configuration before each test + testCase.DefaultConfig = io.config.readDatasetConfiguration(); + end + end + + methods(Test) + function testBasicFunctionality(testCase) + % Test basic functionality with default configuration + nwbFile = NwbFile( ... + 'identifier', 'TEST123', ... + 'session_description', 'test session', ... + 'session_start_time', datetime()); + + % Should not throw any errors + io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig); + end + + function testNumericDatasetConfiguration(testCase) + % Test configuration of numeric datasets + nwbFile = NwbFile( ... + 'identifier', 'TEST123', ... + 'session_description', 'test session', ... + 'session_start_time', datetime()); + + % Create a large numeric dataset + data = types.core.TimeSeries( ... + 'data', rand(1000, 1000), ... + 'data_unit', 'n/a', ... + 'timestamps', 1:1000); + + nwbFile.acquisition.set('test_data', data); + + % Apply configuration + io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig); + + % Verify the dataset was converted to DataPipe + testCase.verifyTrue(isa(nwbFile.acquisition.get('test_data').data, ... + 'types.untyped.DataPipe'), ... + 'Large numeric dataset should be converted to DataPipe'); + end + + function testSmallNumericDataset(testCase) + % Test that small numeric datasets remain unchanged + nwbFile = NwbFile( ... + 'identifier', 'TEST123', ... + 'session_description', 'test session', ... + 'session_start_time', datetime()); + + % Create a small numeric dataset + data = types.core.TimeSeries( ... + 'data', rand(10, 10), ... + 'data_unit', 'n/a', ... + 'timestamps', 1:10); + + nwbFile.acquisition.set('test_data', data); + + % Apply configuration + io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig); + + % Verify the dataset remains numeric + testCase.verifyTrue(isnumeric(nwbFile.acquisition.get('test_data').data), ... + 'Small numeric dataset should remain numeric'); + end + + function testOverrideExisting(testCase) + % Test override behavior for existing DataPipe objects + nwbFile = NwbFile( ... + 'identifier', 'TEST123', ... + 'session_description', 'test session', ... + 'session_start_time', datetime()); + + % Create a DataPipe object + rawData = rand(1000, 1000); + dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, 'chunkSize', 100); + + data = types.core.TimeSeries( ... + 'data', dataPipe, ... + 'data_unit', 'n/a', ... + 'timestamps', 1:1000); + + nwbFile.acquisition.set('test_data', data); + + % Apply configuration with override + io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig, ... + 'OverrideExisting', true); + + % Verify the DataPipe was reconfigured + resultPipe = nwbFile.acquisition.get('test_data').data; + testCase.verifyTrue(isa(resultPipe, 'types.untyped.DataPipe'), ... + 'Result should still be a DataPipe'); + end + + function testNoOverrideExisting(testCase) + % Test that existing DataPipe objects are not modified without override + nwbFile = NwbFile( ... + 'identifier', 'TEST123', ... + 'session_description', 'test session', ... + 'session_start_time', datetime()); + + % Create a DataPipe object with specific configuration + rawData = rand(1000, 1000); + originalChunkSize = [100, 100]; + dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, ... + 'chunkSize', originalChunkSize); + + data = types.core.TimeSeries( ... + 'data', dataPipe, ... + 'data_unit', 'n/a', ... + 'timestamps', 1:1000); + + nwbFile.acquisition.set('test_data', data); + + % Apply configuration without override + io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig, ... + 'OverrideExisting', false); + + % Verify the DataPipe configuration remains unchanged + resultPipe = nwbFile.acquisition.get('test_data').data; + testCase.verifyEqual(resultPipe.chunkSize, originalChunkSize, ... + 'DataPipe configuration should remain unchanged without override'); + end + end +end diff --git a/configuration/archive_dataset_configuration.json b/configuration/archive_dataset_configuration.json new file mode 100644 index 00000000..0b26f018 --- /dev/null +++ b/configuration/archive_dataset_configuration.json @@ -0,0 +1,21 @@ +{ + "Default": { + "layout": "chunked", + "target_chunk_size": { + "value": 100000000, + "unit": "bytes" + }, + "chunk_dimensions": [ + [null], + [null, "max"], + [null, "max", "max"], + [null, "max", "max", "max"] + ], + "compression": { + "algorithm": "ZStandard", + "level": 5, + "parameters": {}, + "prefilters": ["shuffle"] + } + } +} \ No newline at end of file diff --git a/configuration/cloud_dataset_configuration.json b/configuration/cloud_dataset_configuration.json new file mode 100644 index 00000000..d162cd8c --- /dev/null +++ b/configuration/cloud_dataset_configuration.json @@ -0,0 +1,54 @@ +{ + "Default": { + "layout": "chunked", + "target_chunk_size": { + "value": 10000000, + "unit": "bytes" + }, + "chunk_dimensions": [ + ["flex"], + ["flex", "max"], + ["flex", "max", "max"], + ["flex", "max", "max", "max"] + ], + "compression": { + "algorithm": "deflate", + "level": 3, + "parameters": {}, + "prefilters": ["shuffle"] + } + }, + "VectorData": { + "compression": { + "algorithm": "deflate", + "level": 7 + } + }, + "TimeSeries": { + "data": { + "chunk_dimensions": [["flex", 32], ["flex", 32, "max"]], + "compression": { + "algorithm": "deflate", + "level": 4 + } + }, + "timestamps": { + "chunk_dimensions": ["flex"] + } + }, + "ImageSeries": { + "data": { + "chunk_dimensions": [["flex", "max", "max"], ["flex", "max", "max", "max"]] + } + }, + "ElectricalSeries": { + "data": { + "chunk_dimensions": [["flex", 64], ["flex", 64, "max"]] + } + }, + "SpikeEventSeries": { + "data": { + "chunk_dimensions": [1000] + } + } +} diff --git a/configuration/default_dataset_configuration.json b/configuration/default_dataset_configuration.json new file mode 100644 index 00000000..df12d3e7 --- /dev/null +++ b/configuration/default_dataset_configuration.json @@ -0,0 +1,21 @@ +{ + "Default": { + "layout": "chunked", + "target_chunk_size": { + "value": 1000000, + "unit": "bytes" + }, + "chunk_dimensions": [ + [null], + [null, "max"], + [null, "max", "max"], + [null, "max", "max", "max"] + ], + "compression": { + "algorithm": "deflate", + "level": 3, + "parameters": {}, + "prefilters": [] + } + } +} \ No newline at end of file