From 5f48a4adf97c70eddd680f281f71317b3915c2de Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 23 Nov 2024 10:43:56 +0100
Subject: [PATCH 01/19] Add initial chunk parameters and function to read it
 from file

---
 +io/+config/readDefaultChunkConfiguration.m | 19 ++++++++++++++++
 configuration/chunk_params.json             | 25 +++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 +io/+config/readDefaultChunkConfiguration.m
 create mode 100644 configuration/chunk_params.json

diff --git a/+io/+config/readDefaultChunkConfiguration.m b/+io/+config/readDefaultChunkConfiguration.m
new file mode 100644
index 00000000..46365403
--- /dev/null
+++ b/+io/+config/readDefaultChunkConfiguration.m
@@ -0,0 +1,19 @@
+function configObject = readDefaultChunkConfiguration()
+% READDEFAULTCHUNKCONFIGURATION Reads the default chunking configuration from a JSON file.
+%
+%   configObject = READDEFAULTCHUNKCONFIGURATION() loads the default chunking
+%   parameters from a JSON configuration file located in the 'configuration' 
+%   directory within the MatNWB directory.
+%
+%   Output:
+%       configObject - A MATLAB structure containing the chunking parameters
+%                      defined in the JSON configuration file.
+%
+%   Example:
+%       % Load the default chunk configuration
+%       config = readDefaultChunkConfiguration();
+%       disp(config);
+
+    configFilePath = fullfile(misc.getMatnwbDir, 'configuration', 'chunk_params.json');
+    configObject = jsondecode(fileread(configFilePath));
+end
diff --git a/configuration/chunk_params.json b/configuration/chunk_params.json
new file mode 100644
index 00000000..f9f2e3c8
--- /dev/null
+++ b/configuration/chunk_params.json
@@ -0,0 +1,25 @@
+{
+    "NWBContainer": {
+        "chunk_compression": "gzip",
+        "chunk_compression_args": 4,
+        "chunk_default_size": 10000000.0,
+        "chunk_default_size_unit": "bytes"
+    },
+    "ElectricalSeries": {
+        "data": {
+            "chunk_dimensions": [
+                null,
+                32
+            ]
+        }
+    },
+    "ImageSeries": {
+        "data": {
+            "chunk_dimensions": [
+                null,
+                "max",
+                "max"
+            ]
+        }
+    }
+}

From 4822417601ba34f95442a921e6925c28ae96d9fc Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 23 Nov 2024 12:38:52 +0100
Subject: [PATCH 02/19] First draft of applying chunk configurations

---
 .../+internal/computeChunkSizeFromConfig.m    | 62 +++++++++++++++
 +io/+config/+internal/getDataByteSize.m       |  7 ++
 .../+internal/resolveDataTypeChunkConfig.m    | 76 +++++++++++++++++++
 +io/+config/applyChunkConfiguration.m         | 49 ++++++++++++
 +io/+config/readDefaultChunkConfiguration.m   |  2 +-
 configuration/chunk_params.json               | 26 ++++++-
 6 files changed, 220 insertions(+), 2 deletions(-)
 create mode 100644 +io/+config/+internal/computeChunkSizeFromConfig.m
 create mode 100644 +io/+config/+internal/getDataByteSize.m
 create mode 100644 +io/+config/+internal/resolveDataTypeChunkConfig.m
 create mode 100644 +io/+config/applyChunkConfiguration.m

diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
new file mode 100644
index 00000000..14f0d7d7
--- /dev/null
+++ b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -0,0 +1,62 @@
+function chunkSize = computeChunkSizeFromConfig(A, chunkSpecification)
+% computeChunkSizeFromConfig - Compute the chunk size for a dataset using the provided specification.
+%   This function determines the chunk size for a dataset based on the chunk
+%   dimensions provided in the chunkSpecification. It adjusts dimensions according
+%   to rules: 'max' uses the dataset size, fixed numbers use their value, and 'null'
+%   calculates the dimension size to approximate the target chunk size in bytes.
+%
+%   Inputs:
+%       A - A numeric dataset whose chunk size is to be computed.
+%       chunkSpecification (1,1) struct - Struct defining chunk dimensions and settings.
+%
+%   Output:
+%       chunkSize - A vector specifying the chunk size for each dimension.
+
+    arguments
+        A {mustBeNumeric}
+        chunkSpecification (1,1) struct
+    end
+
+    % Get dataset size
+    dataSize = size(A);
+    dataSize = fliplr(dataSize);  % matnwb quirk
+    numDimensions = numel(dataSize);
+
+    % Extract relevant configuration parameters
+    chunkDimensions = squeeze(chunkSpecification.data.chunk_dimensions);
+    defaultChunkSize = chunkSpecification.chunk_default_size; % in bytes
+    dataByteSize = io.config.internal.getDataByteSize(A);
+
+    % Initialize chunk size array
+    chunkSize = zeros(1, numDimensions);
+
+    % Calculate chunk size for each dimension
+    for dim = 1:numDimensions
+        if dim > numel(chunkDimensions)
+            % Use full size for dimensions beyond the specification
+            chunkSize(dim) = dataSize(dim);
+        else
+            dimSpec = chunkDimensions{dim};
+            if isempty(dimSpec)
+                % Compute chunk size for 'null' dimensions
+                % Estimate proportional size based on remaining chunk size
+                remainingChunkSize = defaultChunkSize / dataByteSize; % scale factor for all dimensions
+                nullDimensions = find(cellfun(@isempty, chunkDimensions));
+                proportionalSize = nthroot(remainingChunkSize, numel(nullDimensions));
+                chunkSize(dim) = max(1, round(proportionalSize*dataSize(dim)));
+            elseif isnumeric(dimSpec)
+                % Fixed chunk size
+                chunkSize(dim) = dimSpec;
+            elseif ischar(dimSpec) && strcmp(dimSpec, 'max')
+                % Use full dimension size
+                chunkSize(dim) = dataSize(dim);
+            else
+                error('Invalid chunk specification for dimension %d.', dim);
+            end
+        end
+    end
+
+    % Ensure chunk size does not exceed dataset dimensions
+    chunkSize = min(chunkSize, dataSize);
+    chunkSize = fliplr(chunkSize);
+end
diff --git a/+io/+config/+internal/getDataByteSize.m b/+io/+config/+internal/getDataByteSize.m
new file mode 100644
index 00000000..b24a6617
--- /dev/null
+++ b/+io/+config/+internal/getDataByteSize.m
@@ -0,0 +1,7 @@
+function byteSize = getDataByteSize(data)
+% getDataByteSize - Get bytesize of a numeric array
+    dataType = class(data);
+    bytesPerDataPoint = io.getMatTypeSize(dataType);
+
+    byteSize = numel(data) .* bytesPerDataPoint;
+end
diff --git a/+io/+config/+internal/resolveDataTypeChunkConfig.m b/+io/+config/+internal/resolveDataTypeChunkConfig.m
new file mode 100644
index 00000000..f5d8df2e
--- /dev/null
+++ b/+io/+config/+internal/resolveDataTypeChunkConfig.m
@@ -0,0 +1,76 @@
+function resolvedOptions = resolveDataTypeChunkConfig(chunkSpecification, nwbObject)
+% resolveDataTypeChunkConfig - Resolve the chunk options for individual datatypes
+%   This function resolves the chunk configuration options for a given NWB object
+%   by traversing the object hierarchy and combining options from the most specific
+%   type to the base type, as defined in the chunkSpecification.
+%
+%   Input:
+%       chunkSpecification (struct): A struct representation of the chunk configuration JSON.
+%       nwbObject (types.untyped.MetaClass): An NWB object whose chunk configuration will be resolved.
+%
+%   Output:
+%       resolvedOptions (struct): A struct containing the resolved chunk configuration options.
+
+    arguments
+        chunkSpecification (1,1) struct
+        nwbObject (1,1) types.untyped.MetaClass
+    end
+
+    % Initialize resolvedOptions with an empty struct
+    resolvedOptions = struct();
+
+    % Get the NWB object type hierarchy (from most specific to base type)
+    typeHierarchy = getTypeHierarchy(nwbObject);
+
+    % Traverse the type hierarchy to resolve options
+    for i = numel(typeHierarchy):-1:1
+        typeName = typeHierarchy{i};
+
+        % Check if the type has a chunkSpecification
+        if isfield(chunkSpecification, typeName)
+            typeOptions = chunkSpecification.(typeName);
+
+            % Merge options into resolvedOptions
+            resolvedOptions = mergeStructs(resolvedOptions, typeOptions);
+        end
+    end
+end
+
+function typeHierarchy = getTypeHierarchy(nwbObject)
+% getTypeHierarchy - Retrieve the type hierarchy of an NWB object.
+%   This function returns a cell array of type names, starting from the specific
+%   type of the given NWB object up to its base type.
+
+    typeHierarchy = {};  % Initialize an empty cell array
+    currentType = class(nwbObject); % Start with the specific type
+
+    while ~isempty(currentType)
+        shortClassName = regexp(currentType, '[^.]+$', 'match', 'once');
+        typeHierarchy{end+1} = shortClassName; %#ok<AGROW>
+
+        % Use MetaClass information to get the parent type
+        metaClass = meta.class.fromName(currentType);
+        if isempty(metaClass.SuperclassList)
+            break; % Reached the base type
+        end
+        currentType = metaClass.SuperclassList(1).Name;
+    end
+end
+
+function merged = mergeStructs(baseStruct, newStruct)
+% mergeStructs - Merge two structs, with fields in newStruct overriding those in baseStruct.
+
+    merged = baseStruct; % Start with the base struct
+
+    fields = fieldnames(newStruct);
+    for i = 1:numel(fields)
+        field = fields{i};
+        if isstruct(newStruct.(field)) && isfield(baseStruct, field) && isstruct(baseStruct.(field))
+            % Recursively merge if both fields are structs
+            merged.(field) = mergeStructs(baseStruct.(field), newStruct.(field));
+        else
+            % Otherwise, override the field
+            merged.(field) = newStruct.(field);
+        end
+    end
+end
diff --git a/+io/+config/applyChunkConfiguration.m b/+io/+config/applyChunkConfiguration.m
new file mode 100644
index 00000000..6f59fb45
--- /dev/null
+++ b/+io/+config/applyChunkConfiguration.m
@@ -0,0 +1,49 @@
+function applyChunkConfiguration(nwbObject, chunkConfiguration)
+    arguments
+        nwbObject (1,1) NwbFile
+        chunkConfiguration (1,1) struct = io.config.readDefaultChunkConfiguration()
+    end
+
+    objectMap = nwbObject.searchFor('');
+    objectKeys = objectMap.keys();
+
+    filteredObjectMap = containers.Map();
+    for i = 1:numel(objectKeys)
+        thisObjectKey = objectKeys{i};
+        thisNwbObject = objectMap(thisObjectKey);
+        if startsWith(class(thisNwbObject), "types.") && ~startsWith(class(thisNwbObject), "types.untyped")
+            filteredObjectMap(thisObjectKey) = thisNwbObject;
+        end
+    end
+    clear objectMap
+    
+    objectKeys = filteredObjectMap.keys();
+    for i = 1:numel(objectKeys)
+        thisObjectKey = objectKeys{i};
+        thisNwbObject = filteredObjectMap(thisObjectKey);
+
+        % Todo: Find dataset properties where it makes sense to do chunking
+        % I.e data, timestamps etc. Can this be determined automatically,
+        % or do we need a lookup?
+
+        dataTypeChunkOptions = io.config.internal.resolveDataTypeChunkConfig(chunkConfiguration, thisNwbObject);
+
+        if isprop(thisNwbObject, 'data')
+            if ~isa(thisNwbObject.data, 'types.untyped.DataPipe')
+                % Create a datapipe object for the property value.
+                dataByteSize = io.config.internal.getDataByteSize(thisNwbObject.data);
+                if dataByteSize > dataTypeChunkOptions.chunk_default_size
+                    chunkSize = io.config.internal.computeChunkSizeFromConfig(thisNwbObject.data, dataTypeChunkOptions);
+                    maxSize = size(thisNwbObject.data);
+
+                    dataPipe = types.untyped.DataPipe( ...
+                        'data', thisNwbObject.data, ...
+                        'maxSize', maxSize, ...
+                        'chunkSize', chunkSize, ...
+                        'compressionLevel', dataTypeChunkOptions.chunk_compression_args);
+                    thisNwbObject.data = dataPipe;
+                end
+            end
+        end
+    end
+end
diff --git a/+io/+config/readDefaultChunkConfiguration.m b/+io/+config/readDefaultChunkConfiguration.m
index 46365403..1c08c375 100644
--- a/+io/+config/readDefaultChunkConfiguration.m
+++ b/+io/+config/readDefaultChunkConfiguration.m
@@ -2,7 +2,7 @@
 % READDEFAULTCHUNKCONFIGURATION Reads the default chunking configuration from a JSON file.
 %
 %   configObject = READDEFAULTCHUNKCONFIGURATION() loads the default chunking
-%   parameters from a JSON configuration file located in the 'configuration' 
+%   parameters from a JSON configuration file located in the 'configuration'
 %   directory within the MatNWB directory.
 %
 %   Output:
diff --git a/configuration/chunk_params.json b/configuration/chunk_params.json
index f9f2e3c8..2ce804d9 100644
--- a/configuration/chunk_params.json
+++ b/configuration/chunk_params.json
@@ -3,7 +3,23 @@
         "chunk_compression": "gzip",
         "chunk_compression_args": 4,
         "chunk_default_size": 10000000.0,
-        "chunk_default_size_unit": "bytes"
+        "chunk_default_size_unit": "bytes",
+        "data": {
+            "chunk_dimensions": [
+                null
+            ]
+        }
+    },    
+    "Data": {
+        "chunk_compression": "gzip",
+        "chunk_compression_args": 4,
+        "chunk_default_size": 10000000.0,
+        "chunk_default_size_unit": "bytes",
+        "data": {
+            "chunk_dimensions": [
+                null
+            ]
+        }
     },
     "ElectricalSeries": {
         "data": {
@@ -13,6 +29,14 @@
             ]
         }
     },
+    "TimeSeries": {
+        "data": {
+            "chunk_dimensions": [
+                null,
+                32
+            ]
+        }
+    },
     "ImageSeries": {
         "data": {
             "chunk_dimensions": [

From cfdefd6e4eb4de4540d4edd127486f8d4cb7fdbf Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 23 Nov 2024 12:55:12 +0100
Subject: [PATCH 03/19] Minor fixes

---
 +io/+config/+internal/computeChunkSizeFromConfig.m | 2 +-
 +io/+config/applyChunkConfiguration.m              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
index 14f0d7d7..fdfa5a34 100644
--- a/+io/+config/+internal/computeChunkSizeFromConfig.m
+++ b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -23,7 +23,7 @@
     numDimensions = numel(dataSize);
 
     % Extract relevant configuration parameters
-    chunkDimensions = squeeze(chunkSpecification.data.chunk_dimensions);
+    chunkDimensions = chunkSpecification.data.chunk_dimensions;
     defaultChunkSize = chunkSpecification.chunk_default_size; % in bytes
     dataByteSize = io.config.internal.getDataByteSize(A);
 
diff --git a/+io/+config/applyChunkConfiguration.m b/+io/+config/applyChunkConfiguration.m
index 6f59fb45..e0d0856e 100644
--- a/+io/+config/applyChunkConfiguration.m
+++ b/+io/+config/applyChunkConfiguration.m
@@ -29,7 +29,7 @@ function applyChunkConfiguration(nwbObject, chunkConfiguration)
         dataTypeChunkOptions = io.config.internal.resolveDataTypeChunkConfig(chunkConfiguration, thisNwbObject);
 
         if isprop(thisNwbObject, 'data')
-            if ~isa(thisNwbObject.data, 'types.untyped.DataPipe')
+            if isnumeric(thisNwbObject.data)
                 % Create a datapipe object for the property value.
                 dataByteSize = io.config.internal.getDataByteSize(thisNwbObject.data);
                 if dataByteSize > dataTypeChunkOptions.chunk_default_size

From e164ce020a15464e93837196b36f97e116d0af58 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Tue, 21 Jan 2025 11:13:53 +0100
Subject: [PATCH 04/19] Create listDatasetsOfNeurodataType.m

---
 +schemes/listDatasetsOfNeurodataType.m | 32 ++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 +schemes/listDatasetsOfNeurodataType.m

diff --git a/+schemes/listDatasetsOfNeurodataType.m b/+schemes/listDatasetsOfNeurodataType.m
new file mode 100644
index 00000000..8a6ee902
--- /dev/null
+++ b/+schemes/listDatasetsOfNeurodataType.m
@@ -0,0 +1,32 @@
+function datasetNames = listDatasetsOfNeurodataType(typeClassName)
+% listDatasetsOfNeurodataType - List names of datasets of a neurodata type
+%
+% Input Arguments:
+%   - typeClassName (string) -
+%     Full MatNWB class name for a neurodata type, i.e "types.core.TimeSeries"
+%
+% Output Arguments:
+%   - datasetNames (string) - 
+%     Names of datasets contained in the specified neurodata type
+
+    arguments
+        typeClassName (1,1) string
+    end
+
+    classNameSplit = string( split(typeClassName, '.') );
+    typesIdx = find(classNameSplit == "types");
+    
+    assert(~isempty(typesIdx), 'Expected class name to contain "types"')
+    namespaceName = classNameSplit(typesIdx+1);
+    namespace = schemes.loadNamespace(namespaceName, misc.getMatnwbDir);
+    
+    neurodataTypeName = classNameSplit(typesIdx+2);
+    typeScheme = namespace.registry(neurodataTypeName);
+    
+    datasetMaps = typeScheme('datasets');
+
+    datasetNames = repmat("", size(datasetMaps));
+    for i = 1:numel(datasetMaps)
+        datasetNames(i) = datasetMaps{i}('name');
+    end
+end

From e5f9bc74a56ce7c3e7f70d37f23b9d3c9fe5a102 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Tue, 21 Jan 2025 17:07:42 +0100
Subject: [PATCH 05/19] Add new template for dataset configuration json

---
 .../cloud_dataset_configuration.json          | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 configuration/cloud_dataset_configuration.json

diff --git a/configuration/cloud_dataset_configuration.json b/configuration/cloud_dataset_configuration.json
new file mode 100644
index 00000000..b410b2fc
--- /dev/null
+++ b/configuration/cloud_dataset_configuration.json
@@ -0,0 +1,48 @@
+{
+    "Default": {
+        "layout": "chunked",
+        "target_chunk_size": {
+            "value": 10000000,
+            "unit": "bytes"
+        },
+        "chunk_dimensions": [
+            [null], 
+            [null, "max"], 
+            [null, "max", "max"], 
+            [null, "max", "max", "max"]
+        ],
+        "compression": {
+            "algorithm": "deflate",
+            "level": 3,
+            "parameters": {},
+            "prefilters": ["shuffle"]
+        }
+    },
+    "TimeSeries": {
+        "data": {
+            "chunk_dimensions": [[null, 32], [null, 32, "max"]],
+            "compression": {
+                "algorithm": "deflate",
+                "level": 4
+            }
+        },
+        "timestamps": {
+            "chunk_dimensions": [null]
+        }
+    },
+    "ImageSeries": {
+        "data": {
+            "chunk_dimensions": [[null, "max", "max"], [null, "max", "max", "max"]]
+        }
+    },
+    "ElectricalSeries": {
+        "data": {
+            "chunk_dimensions": [[null, 32], [null, 32, "max"]]
+        }
+    },
+    "SpikeEventSeries": {
+        "data": {
+            "chunk_dimensions": [1000]
+        }
+    }
+}
\ No newline at end of file

From c7402d896b911ac02b20f7a84d173e3df1b68097 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Tue, 21 Jan 2025 17:09:45 +0100
Subject: [PATCH 06/19] Update applyChunkConfiguration and dependent functions
 to work with new template

---
 .../+internal/computeChunkSizeFromConfig.m    |   9 +-
 .../+internal/configureDataPipeFromData.m     |  41 ++++++
 +io/+config/+internal/reconfigureDataPipe.m   |   4 +
 .../+internal/resolveDataTypeChunkConfig.m    |  17 ++-
 +io/+config/applyChunkConfiguration.m         | 119 ++++++++++++------
 +io/+config/readDefaultChunkConfiguration.m   |  25 ++--
 +matnwb/+common/getParentType.m               |   7 ++
 +schemes/listDatasetsOfNeurodataType.m        |  26 +++-
 8 files changed, 187 insertions(+), 61 deletions(-)
 create mode 100644 +io/+config/+internal/configureDataPipeFromData.m
 create mode 100644 +io/+config/+internal/reconfigureDataPipe.m
 create mode 100644 +matnwb/+common/getParentType.m

diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
index fdfa5a34..f25158dc 100644
--- a/+io/+config/+internal/computeChunkSizeFromConfig.m
+++ b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -23,8 +23,13 @@
     numDimensions = numel(dataSize);
 
     % Extract relevant configuration parameters
-    chunkDimensions = chunkSpecification.data.chunk_dimensions;
-    defaultChunkSize = chunkSpecification.chunk_default_size; % in bytes
+    chunkDimensions = chunkSpecification.chunk_dimensions;
+    if iscell(chunkDimensions)
+        numChunkDimensions = cellfun(@numel, chunkDimensions);
+        chunkDimensions = chunkDimensions{numChunkDimensions == numDimensions};
+    end
+
+    defaultChunkSize = chunkSpecification.target_chunk_size.value; % in bytes
     dataByteSize = io.config.internal.getDataByteSize(A);
 
     % Initialize chunk size array
diff --git a/+io/+config/+internal/configureDataPipeFromData.m b/+io/+config/+internal/configureDataPipeFromData.m
new file mode 100644
index 00000000..cbc7a46f
--- /dev/null
+++ b/+io/+config/+internal/configureDataPipeFromData.m
@@ -0,0 +1,41 @@
+function dataPipe = configureDataPipeFromData(numericData, datasetConfig)
+% configureDataPipeFromData - Configure a DataPipe from numeric data and dataset configuration
+    
+    import io.config.internal.computeChunkSizeFromConfig
+    import types.untyped.datapipe.properties.DynamicFilter
+
+    chunkSize = computeChunkSizeFromConfig(numericData, datasetConfig);
+    maxSize = size(numericData);
+
+    dataPipeArgs = {...
+        "data", numericData, ...
+        "maxSize", maxSize, ...
+        "chunkSize", chunkSize };
+
+    hasShuffle = contains(datasetConfig.compression.prefilters, 'shuffle');
+
+    if strcmpi(datasetConfig.compression.algorithm, "Deflate")
+        % Use standard compression filters
+        dataPipeArgs = [ dataPipeArgs, ...
+            {'hasShuffle', hasShuffle, ...
+            'compressionLevel', datasetConfig.compression.level} ...
+            ];
+    else 
+        % Create property list of custom filters for dataset creation
+        compressionFilter = DynamicFilter( ...
+            datasetConfig.compression.algorithm, ...
+            datasetConfig.compression.level );
+        
+        if hasShuffle
+            shuffleFilter = types.untyped.datapipe.properties.Shuffle();
+            filters = [shuffleFilter compressionFilter];
+        else
+            filters = compressionFilter;
+        end
+        dataPipeArgs = [ dataPipeArgs, ...
+            {'filters', filters} ];
+    end
+
+    % Create the datapipe.
+    dataPipe = types.untyped.DataPipe( dataPipeArgs{:} );
+end
\ No newline at end of file
diff --git a/+io/+config/+internal/reconfigureDataPipe.m b/+io/+config/+internal/reconfigureDataPipe.m
new file mode 100644
index 00000000..183c0994
--- /dev/null
+++ b/+io/+config/+internal/reconfigureDataPipe.m
@@ -0,0 +1,4 @@
+function dataPipe = reconfigureDataPipe(dataPipe, datasetConfig)
+    % todo
+end
+
diff --git a/+io/+config/+internal/resolveDataTypeChunkConfig.m b/+io/+config/+internal/resolveDataTypeChunkConfig.m
index f5d8df2e..f701450b 100644
--- a/+io/+config/+internal/resolveDataTypeChunkConfig.m
+++ b/+io/+config/+internal/resolveDataTypeChunkConfig.m
@@ -1,4 +1,4 @@
-function resolvedOptions = resolveDataTypeChunkConfig(chunkSpecification, nwbObject)
+function resolvedOptions = resolveDataTypeChunkConfig(chunkSpecification, nwbObject, datasetName)
 % resolveDataTypeChunkConfig - Resolve the chunk options for individual datatypes
 %   This function resolves the chunk configuration options for a given NWB object
 %   by traversing the object hierarchy and combining options from the most specific
@@ -14,10 +14,11 @@
     arguments
         chunkSpecification (1,1) struct
         nwbObject (1,1) types.untyped.MetaClass
+        datasetName (1,1) string
     end
 
-    % Initialize resolvedOptions with an empty struct
-    resolvedOptions = struct();
+    % Initialize resolvedOptions with default options.
+    resolvedOptions = chunkSpecification.Default;
 
     % Get the NWB object type hierarchy (from most specific to base type)
     typeHierarchy = getTypeHierarchy(nwbObject);
@@ -26,12 +27,16 @@
     for i = numel(typeHierarchy):-1:1
         typeName = typeHierarchy{i};
 
-        % Check if the type has a chunkSpecification
+        % Check if the neurodata type has a chunkSpecification
         if isfield(chunkSpecification, typeName)
             typeOptions = chunkSpecification.(typeName);
 
-            % Merge options into resolvedOptions
-            resolvedOptions = mergeStructs(resolvedOptions, typeOptions);
+            % Is datasetName part of typeOptions?
+            if isfield(typeOptions, datasetName)
+                % Merge options into resolvedOptions
+                datasetOptions = typeOptions.(datasetName);
+                resolvedOptions = mergeStructs(resolvedOptions, datasetOptions);
+            end
         end
     end
 end
diff --git a/+io/+config/applyChunkConfiguration.m b/+io/+config/applyChunkConfiguration.m
index e0d0856e..568620fc 100644
--- a/+io/+config/applyChunkConfiguration.m
+++ b/+io/+config/applyChunkConfiguration.m
@@ -1,49 +1,90 @@
-function applyChunkConfiguration(nwbObject, chunkConfiguration)
+function applyChunkConfiguration(nwbObject, chunkConfiguration, options)
+% applyChunkConfiguration - Apply chunk configuration to datasets of an NWB object
+    
     arguments
-        nwbObject (1,1) NwbFile
-        chunkConfiguration (1,1) struct = io.config.readDefaultChunkConfiguration()
+        nwbObject (1,1) types.untyped.MetaClass
+        chunkConfiguration (1,1) struct = io.config.readDefaultChunkConfiguration() % Todo: class for this...?
+        options.OverrideExisting (1,1) logical = false
     end
+    
+    import io.config.internal.resolveDataTypeChunkConfig
 
-    objectMap = nwbObject.searchFor('');
-    objectKeys = objectMap.keys();
-
-    filteredObjectMap = containers.Map();
-    for i = 1:numel(objectKeys)
-        thisObjectKey = objectKeys{i};
-        thisNwbObject = objectMap(thisObjectKey);
-        if startsWith(class(thisNwbObject), "types.") && ~startsWith(class(thisNwbObject), "types.untyped")
-            filteredObjectMap(thisObjectKey) = thisNwbObject;
-        end
+    if isa(nwbObject, 'NwbFile')
+        neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject);
+    else
+        neurodataObjects = {nwbObject};
     end
-    clear objectMap
+
+    for iNeurodataObject = 1:numel(neurodataObjects)
+        thisNeurodataObject = neurodataObjects{iNeurodataObject};
+        thisNeurodataClassName = class(thisNeurodataObject);
+        
+        % Need to keep track of this. A dataset can be defined across
+        % multiple levels of the class hierarchy, the lowest class should
+        % take precedence
+        processedDatasets = string.empty;
+
+        isFinished = false;
+        while ~isFinished % Iterate over type and it's ancestor types (superclasses)
+
+            datasetNames = schemes.listDatasetsOfNeurodataType( thisNeurodataClassName );
+
+            for thisDatasetName = datasetNames % Iterate over all datasets of a type...
+    
+                if ismember(thisDatasetName, processedDatasets)
+                    continue
+                end
+
+                datasetConfig = resolveDataTypeChunkConfig(...
+                    chunkConfiguration, ...
+                    thisNeurodataObject, ...
+                    thisDatasetName);
     
-    objectKeys = filteredObjectMap.keys();
-    for i = 1:numel(objectKeys)
-        thisObjectKey = objectKeys{i};
-        thisNwbObject = filteredObjectMap(thisObjectKey);
-
-        % Todo: Find dataset properties where it makes sense to do chunking
-        % I.e data, timestamps etc. Can this be determined automatically,
-        % or do we need a lookup?
-
-        dataTypeChunkOptions = io.config.internal.resolveDataTypeChunkConfig(chunkConfiguration, thisNwbObject);
-
-        if isprop(thisNwbObject, 'data')
-            if isnumeric(thisNwbObject.data)
-                % Create a datapipe object for the property value.
-                dataByteSize = io.config.internal.getDataByteSize(thisNwbObject.data);
-                if dataByteSize > dataTypeChunkOptions.chunk_default_size
-                    chunkSize = io.config.internal.computeChunkSizeFromConfig(thisNwbObject.data, dataTypeChunkOptions);
-                    maxSize = size(thisNwbObject.data);
-
-                    dataPipe = types.untyped.DataPipe( ...
-                        'data', thisNwbObject.data, ...
-                        'maxSize', maxSize, ...
-                        'chunkSize', chunkSize, ...
-                        'compressionLevel', dataTypeChunkOptions.chunk_compression_args);
-                    thisNwbObject.data = dataPipe;
+                datasetData = thisNeurodataObject.(thisDatasetName);
+        
+                if isnumeric(datasetData)
+                    % Create a datapipe object for a numeric dataset value.
+                    dataByteSize = io.config.internal.getDataByteSize(datasetData);
+                    if dataByteSize > datasetConfig.target_chunk_size.value
+                        dataPipe = io.config.internal.configureDataPipeFromData(datasetData, datasetConfig);
+                    end
+                elseif isa(datasetData, 'types.untyped.DataPipe')
+                    if options.OverrideExisting
+                        dataPipe = io.config.internal.reconfigureDataPipe(datasetData, datasetConfig);
+                    end
+                elseif isa(datasetData, 'types.untyped.DataStub')
+                    % pass
+                    %error('Not implemented for files obtained by nwbRead')
+                else
+                    disp( class(datasetData) )
                 end
+    
+                if exist('dataPipe', 'var')
+                    thisNeurodataObject.(thisDatasetName) = dataPipe;
+                    processedDatasets = [processedDatasets, thisDatasetName]; %#ok<AGROW>
+                    clear dataPipe
+                end
+            end
+
+            parentType = matnwb.common.getParentType(thisNeurodataClassName);
+
+            if isempty(parentType)
+                isFinished = true;
+            else
+                thisNeurodataClassName = parentType;
             end
         end
     end
 end
+
+function neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject)
+% getNeurodataObjectsFromNwbObject - Return all neurodata objects in a NwbFile object
+    
+    objectMap = nwbObject.searchFor('types.');
+
+    neurodataObjects = objectMap.values();
+    neurodataClassNames = cellfun(@(c) class(c), neurodataObjects, 'uni', 0); 
+
+    toIgnore = startsWith(neurodataClassNames, "types.untyped");
+    neurodataObjects(toIgnore) = [];
+end
diff --git a/+io/+config/readDefaultChunkConfiguration.m b/+io/+config/readDefaultChunkConfiguration.m
index 1c08c375..6ee3a720 100644
--- a/+io/+config/readDefaultChunkConfiguration.m
+++ b/+io/+config/readDefaultChunkConfiguration.m
@@ -1,19 +1,24 @@
 function configObject = readDefaultChunkConfiguration()
 % READDEFAULTCHUNKCONFIGURATION Reads the default chunking configuration from a JSON file.
 %
-%   configObject = READDEFAULTCHUNKCONFIGURATION() loads the default chunking
-%   parameters from a JSON configuration file located in the 'configuration'
-%   directory within the MatNWB directory.
+% Syntax:
+%   configObject = io.config.READDEFAULTCHUNKCONFIGURATION() loads the default 
+%   chunking parameters from a JSON configuration file located in the
+%   "configuration" folder inside the MatNWB directory.
 %
-%   Output:
-%       configObject - A MATLAB structure containing the chunking parameters
+% Output Arguments:
+%   - configObject - A MATLAB structure containing the chunking parameters
 %                      defined in the JSON configuration file.
 %
-%   Example:
-%       % Load the default chunk configuration
-%       config = readDefaultChunkConfiguration();
-%       disp(config);
+% Example 1 - Load default dataset configurations::
+%    % Load the default chunk configuration
+%    config = readDefaultChunkConfiguration();
+%    disp(config);
+
+    configFilePath = fullfile(...
+        misc.getMatnwbDir, ...
+        'configuration', ...
+        'cloud_dataset_configuration.json');
 
-    configFilePath = fullfile(misc.getMatnwbDir, 'configuration', 'chunk_params.json');
     configObject = jsondecode(fileread(configFilePath));
 end
diff --git a/+matnwb/+common/getParentType.m b/+matnwb/+common/getParentType.m
new file mode 100644
index 00000000..816d30ea
--- /dev/null
+++ b/+matnwb/+common/getParentType.m
@@ -0,0 +1,7 @@
+function parentTypeClassName = getParentType(typeClassName)
+    mc = meta.class.fromName(typeClassName);
+    parentTypeClassName = mc.SuperclassList(1).Name;
+    if strcmp(parentTypeClassName, "types.untyped.MetaClass")
+        parentTypeClassName = string.empty;
+    end
+end
\ No newline at end of file
diff --git a/+schemes/listDatasetsOfNeurodataType.m b/+schemes/listDatasetsOfNeurodataType.m
index 8a6ee902..1d5c6654 100644
--- a/+schemes/listDatasetsOfNeurodataType.m
+++ b/+schemes/listDatasetsOfNeurodataType.m
@@ -18,15 +18,33 @@
     
     assert(~isempty(typesIdx), 'Expected class name to contain "types"')
     namespaceName = classNameSplit(typesIdx+1);
+    namespaceName = strrep(namespaceName, '_', '-');
     namespace = schemes.loadNamespace(namespaceName, misc.getMatnwbDir);
     
     neurodataTypeName = classNameSplit(typesIdx+2);
     typeScheme = namespace.registry(neurodataTypeName);
     
-    datasetMaps = typeScheme('datasets');
+    switch typeScheme('class_type')
+        case 'groups'
+            if isKey(typeScheme, 'datasets')
+                datasetMaps = typeScheme('datasets');
+        
+                datasetNames = repmat("", size(datasetMaps));
+                for i = 1:numel(datasetMaps)
+                    if isKey(datasetMaps{i}, 'name')
+                        datasetNames(i) = datasetMaps{i}('name');
+                    else
+                        keyboard
+                    end
+                end
+                datasetNames(datasetNames=="") = [];
+            else
+                datasetNames = string.empty;
+            end
 
-    datasetNames = repmat("", size(datasetMaps));
-    for i = 1:numel(datasetMaps)
-        datasetNames(i) = datasetMaps{i}('name');
+        case 'datasets'
+            datasetNames = "data";
+        otherwise
+            error('Unexpected class type')
     end
 end

From b32c3c42ac67d7b52b08e6bebccd8bf97a05f57a Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Tue, 21 Jan 2025 22:03:33 +0100
Subject: [PATCH 07/19] Remove unused condition in applyChunkConfiguration

---
 +io/+config/applyChunkConfiguration.m | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/+io/+config/applyChunkConfiguration.m b/+io/+config/applyChunkConfiguration.m
index 568620fc..9a467a6c 100644
--- a/+io/+config/applyChunkConfiguration.m
+++ b/+io/+config/applyChunkConfiguration.m
@@ -2,34 +2,28 @@ function applyChunkConfiguration(nwbObject, chunkConfiguration, options)
 % applyChunkConfiguration - Apply chunk configuration to datasets of an NWB object
     
     arguments
-        nwbObject (1,1) types.untyped.MetaClass
+        nwbObject (1,1) NwbFile
         chunkConfiguration (1,1) struct = io.config.readDefaultChunkConfiguration() % Todo: class for this...?
         options.OverrideExisting (1,1) logical = false
     end
     
     import io.config.internal.resolveDataTypeChunkConfig
 
-    if isa(nwbObject, 'NwbFile')
-        neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject);
-    else
-        neurodataObjects = {nwbObject};
-    end
+    neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject);
 
     for iNeurodataObject = 1:numel(neurodataObjects)
         thisNeurodataObject = neurodataObjects{iNeurodataObject};
         thisNeurodataClassName = class(thisNeurodataObject);
         
-        % Need to keep track of this. A dataset can be defined across
-        % multiple levels of the class hierarchy, the lowest class should
-        % take precedence
+        % A dataset can be defined on multiple levels of the class hierarchy,
+        % so need to keep track of which datasets have been processed.
         processedDatasets = string.empty;
 
         isFinished = false;
         while ~isFinished % Iterate over type and it's ancestor types (superclasses)
 
             datasetNames = schemes.listDatasetsOfNeurodataType( thisNeurodataClassName );
-
-            for thisDatasetName = datasetNames % Iterate over all datasets of a type...
+            for thisDatasetName = datasetNames % Iterate over all datasets of a type
     
                 if ismember(thisDatasetName, processedDatasets)
                     continue
@@ -53,8 +47,8 @@ function applyChunkConfiguration(nwbObject, chunkConfiguration, options)
                         dataPipe = io.config.internal.reconfigureDataPipe(datasetData, datasetConfig);
                     end
                 elseif isa(datasetData, 'types.untyped.DataStub')
-                    % pass
-                    %error('Not implemented for files obtained by nwbRead')
+                    % todo
+                    % error('Not implemented for files obtained by nwbRead')
                 else
                     disp( class(datasetData) )
                 end

From 37e68e1e8705e9bd94a480f03aaa2fa648cc077b Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Tue, 21 Jan 2025 22:03:39 +0100
Subject: [PATCH 08/19] Update getParentType.m

---
 +matnwb/+common/getParentType.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/+matnwb/+common/getParentType.m b/+matnwb/+common/getParentType.m
index 816d30ea..ba087ab5 100644
--- a/+matnwb/+common/getParentType.m
+++ b/+matnwb/+common/getParentType.m
@@ -4,4 +4,4 @@
     if strcmp(parentTypeClassName, "types.untyped.MetaClass")
         parentTypeClassName = string.empty;
     end
-end
\ No newline at end of file
+end

From 3d5d2ac94ce816df31b8ab1e8420ac52ae41ecda Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Wed, 22 Jan 2025 12:00:06 +0100
Subject: [PATCH 09/19] Add different dataset configuration profiles

---
 .../archive_dataset_configuration.json        | 21 ++++++++
 configuration/chunk_params.json               | 49 -------------------
 .../default_dataset_configuration.json        | 21 ++++++++
 3 files changed, 42 insertions(+), 49 deletions(-)
 create mode 100644 configuration/archive_dataset_configuration.json
 delete mode 100644 configuration/chunk_params.json
 create mode 100644 configuration/default_dataset_configuration.json

diff --git a/configuration/archive_dataset_configuration.json b/configuration/archive_dataset_configuration.json
new file mode 100644
index 00000000..94b7bd23
--- /dev/null
+++ b/configuration/archive_dataset_configuration.json
@@ -0,0 +1,21 @@
+{
+    "Default": {
+        "layout": "chunked",
+        "target_chunk_size": {
+            "value": 100000000,
+            "unit": "bytes"
+        },
+        "chunk_dimensions": [
+            [null], 
+            [null, "max"], 
+            [null, "max", "max"], 
+            [null, "max", "max", "max"]
+        ],
+        "compression": {
+            "algorithm": "ZStandard",
+            "level": 9,
+            "parameters": {},
+            "prefilters": ["shuffle"]
+        }
+    }
+}
\ No newline at end of file
diff --git a/configuration/chunk_params.json b/configuration/chunk_params.json
deleted file mode 100644
index 2ce804d9..00000000
--- a/configuration/chunk_params.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-    "NWBContainer": {
-        "chunk_compression": "gzip",
-        "chunk_compression_args": 4,
-        "chunk_default_size": 10000000.0,
-        "chunk_default_size_unit": "bytes",
-        "data": {
-            "chunk_dimensions": [
-                null
-            ]
-        }
-    },    
-    "Data": {
-        "chunk_compression": "gzip",
-        "chunk_compression_args": 4,
-        "chunk_default_size": 10000000.0,
-        "chunk_default_size_unit": "bytes",
-        "data": {
-            "chunk_dimensions": [
-                null
-            ]
-        }
-    },
-    "ElectricalSeries": {
-        "data": {
-            "chunk_dimensions": [
-                null,
-                32
-            ]
-        }
-    },
-    "TimeSeries": {
-        "data": {
-            "chunk_dimensions": [
-                null,
-                32
-            ]
-        }
-    },
-    "ImageSeries": {
-        "data": {
-            "chunk_dimensions": [
-                null,
-                "max",
-                "max"
-            ]
-        }
-    }
-}
diff --git a/configuration/default_dataset_configuration.json b/configuration/default_dataset_configuration.json
new file mode 100644
index 00000000..8443ffd7
--- /dev/null
+++ b/configuration/default_dataset_configuration.json
@@ -0,0 +1,21 @@
+{
+    "Default": {
+        "layout": "chunked",
+        "target_chunk_size": {
+            "value": null,
+            "unit": "bytes"
+        },
+        "chunk_dimensions": [
+            [null], 
+            [null, "max"], 
+            [null, "max", "max"], 
+            [null, "max", "max", "max"]
+        ],
+        "compression": {
+            "algorithm": "deflate",
+            "level": 3,
+            "parameters": {},
+            "prefilters": []
+        }
+    }
+}
\ No newline at end of file

From 9e623a24e58b94711910838d8a40ec90eaa287ee Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Wed, 22 Jan 2025 12:05:30 +0100
Subject: [PATCH 10/19] Consistently name functions and code using
 datasetConfiguration instead of chunkConfiguration

---
 .../+internal/computeChunkSizeFromConfig.m    |  26 +--
 .../+internal/configureDataPipeFromData.m     |   4 +-
 +io/+config/+internal/reconfigureDataPipe.m   |   1 -
 ...ig.m => resolveDatasetConfigForDataType.m} |  24 +--
 ...guration.m => applyDatasetConfiguration.m} |  14 +-
 +io/+config/readDatasetConfiguration.m        |  44 +++++
 +io/+config/readDefaultChunkConfiguration.m   |  24 ---
 .../+io/+config/DatasetConfigurationTest.m    | 166 ++++++++++++++++++
 8 files changed, 247 insertions(+), 56 deletions(-)
 rename +io/+config/+internal/{resolveDataTypeChunkConfig.m => resolveDatasetConfigForDataType.m} (76%)
 rename +io/+config/{applyChunkConfiguration.m => applyDatasetConfiguration.m} (87%)
 create mode 100644 +io/+config/readDatasetConfiguration.m
 delete mode 100644 +io/+config/readDefaultChunkConfiguration.m
 create mode 100644 +tests/+unit/+io/+config/DatasetConfigurationTest.m

diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
index f25158dc..2fb76843 100644
--- a/+io/+config/+internal/computeChunkSizeFromConfig.m
+++ b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -1,35 +1,41 @@
-function chunkSize = computeChunkSizeFromConfig(A, chunkSpecification)
-% computeChunkSizeFromConfig - Compute the chunk size for a dataset using the provided specification.
+function chunkSize = computeChunkSizeFromConfig(A, datasetConfig)
+% computeChunkSizeFromConfig - Compute the chunk size for a dataset using the provided configuration.
 %   This function determines the chunk size for a dataset based on the chunk
-%   dimensions provided in the chunkSpecification. It adjusts dimensions according
-%   to rules: 'max' uses the dataset size, fixed numbers use their value, and 'null'
-%   calculates the dimension size to approximate the target chunk size in bytes.
+%   dimensions provided in the datasetConfig structure. It adjusts dimensions 
+%   according to rules: 'max' uses the dataset size, fixed numbers use their 
+%   value, and 'null' calculates the dimension size to approximate the target 
+%   chunk size in bytes.
 %
 %   Inputs:
 %       A - A numeric dataset whose chunk size is to be computed.
-%       chunkSpecification (1,1) struct - Struct defining chunk dimensions and settings.
+%       datasetConfig (1,1) struct - Struct defining chunk dimensions and chunk target size.
 %
 %   Output:
 %       chunkSize - A vector specifying the chunk size for each dimension.
 
     arguments
         A {mustBeNumeric}
-        chunkSpecification (1,1) struct
+        datasetConfig (1,1) struct
     end
+    
+    assert(isfield(datasetConfig, 'chunk_dimensions')), ...
+        'Expected datasetConfig to have field "chunk_dimensions"')
+    assert(isfield(datasetConfig, 'target_chunk_size'), ...
+        'Expected datasetConfig to have field "target_chunk_size"')
 
     % Get dataset size
     dataSize = size(A);
     dataSize = fliplr(dataSize);  % matnwb quirk
     numDimensions = numel(dataSize);
-
+    
     % Extract relevant configuration parameters
-    chunkDimensions = chunkSpecification.chunk_dimensions;
+    chunkDimensions = datasetConfig.chunk_dimensions;
     if iscell(chunkDimensions)
         numChunkDimensions = cellfun(@numel, chunkDimensions);
         chunkDimensions = chunkDimensions{numChunkDimensions == numDimensions};
     end
 
-    defaultChunkSize = chunkSpecification.target_chunk_size.value; % in bytes
+    defaultChunkSize = datasetConfig.target_chunk_size.value; % in bytes
     dataByteSize = io.config.internal.getDataByteSize(A);
 
     % Initialize chunk size array
diff --git a/+io/+config/+internal/configureDataPipeFromData.m b/+io/+config/+internal/configureDataPipeFromData.m
index cbc7a46f..04f321ba 100644
--- a/+io/+config/+internal/configureDataPipeFromData.m
+++ b/+io/+config/+internal/configureDataPipeFromData.m
@@ -20,7 +20,7 @@
             {'hasShuffle', hasShuffle, ...
             'compressionLevel', datasetConfig.compression.level} ...
             ];
-    else 
+    else
         % Create property list of custom filters for dataset creation
         compressionFilter = DynamicFilter( ...
             datasetConfig.compression.algorithm, ...
@@ -38,4 +38,4 @@
 
     % Create the datapipe.
     dataPipe = types.untyped.DataPipe( dataPipeArgs{:} );
-end
\ No newline at end of file
+end
diff --git a/+io/+config/+internal/reconfigureDataPipe.m b/+io/+config/+internal/reconfigureDataPipe.m
index 183c0994..3c68a046 100644
--- a/+io/+config/+internal/reconfigureDataPipe.m
+++ b/+io/+config/+internal/reconfigureDataPipe.m
@@ -1,4 +1,3 @@
 function dataPipe = reconfigureDataPipe(dataPipe, datasetConfig)
     % todo
 end
-
diff --git a/+io/+config/+internal/resolveDataTypeChunkConfig.m b/+io/+config/+internal/resolveDatasetConfigForDataType.m
similarity index 76%
rename from +io/+config/+internal/resolveDataTypeChunkConfig.m
rename to +io/+config/+internal/resolveDatasetConfigForDataType.m
index f701450b..402ba0a1 100644
--- a/+io/+config/+internal/resolveDataTypeChunkConfig.m
+++ b/+io/+config/+internal/resolveDatasetConfigForDataType.m
@@ -1,24 +1,24 @@
-function resolvedOptions = resolveDataTypeChunkConfig(chunkSpecification, nwbObject, datasetName)
-% resolveDataTypeChunkConfig - Resolve the chunk options for individual datatypes
-%   This function resolves the chunk configuration options for a given NWB object
+function resolvedOptions = resolveDatasetConfigForDataType(datasetConfig, nwbObject, datasetName)
+% resolveDatasetConfigForDataType - Resolve the dataset configuration for individual neurodata types
+%   This function resolves the dataset configuration options for a given NWB object
 %   by traversing the object hierarchy and combining options from the most specific
-%   type to the base type, as defined in the chunkSpecification.
+%   type to the base type, as defined in the datasetConfig structure.
 %
 %   Input:
-%       chunkSpecification (struct): A struct representation of the chunk configuration JSON.
-%       nwbObject (types.untyped.MetaClass): An NWB object whose chunk configuration will be resolved.
+%       datasetConfig (struct): A struct representation of the dataset configuration JSON.
+%       nwbObject (types.untyped.MetaClass): An NWB object whose dataset configuration will be resolved.
 %
 %   Output:
-%       resolvedOptions (struct): A struct containing the resolved chunk configuration options.
+%       resolvedOptions (struct): A struct containing the resolved dataset configuration options.
 
     arguments
-        chunkSpecification (1,1) struct
+        datasetConfig (1,1) struct
         nwbObject (1,1) types.untyped.MetaClass
         datasetName (1,1) string
     end
 
     % Initialize resolvedOptions with default options.
-    resolvedOptions = chunkSpecification.Default;
+    resolvedOptions = datasetConfig.Default;
 
     % Get the NWB object type hierarchy (from most specific to base type)
     typeHierarchy = getTypeHierarchy(nwbObject);
@@ -27,9 +27,9 @@
     for i = numel(typeHierarchy):-1:1
         typeName = typeHierarchy{i};
 
-        % Check if the neurodata type has a chunkSpecification
-        if isfield(chunkSpecification, typeName)
-            typeOptions = chunkSpecification.(typeName);
+        % Check if the neurodata type has a datasetConfig
+        if isfield(datasetConfig, typeName)
+            typeOptions = datasetConfig.(typeName);
 
             % Is datasetName part of typeOptions?
             if isfield(typeOptions, datasetName)
diff --git a/+io/+config/applyChunkConfiguration.m b/+io/+config/applyDatasetConfiguration.m
similarity index 87%
rename from +io/+config/applyChunkConfiguration.m
rename to +io/+config/applyDatasetConfiguration.m
index 9a467a6c..cbba8193 100644
--- a/+io/+config/applyChunkConfiguration.m
+++ b/+io/+config/applyDatasetConfiguration.m
@@ -1,13 +1,13 @@
-function applyChunkConfiguration(nwbObject, chunkConfiguration, options)
-% applyChunkConfiguration - Apply chunk configuration to datasets of an NWB object
+function applyDatasetConfiguration(nwbObject, datasetConfiguration, options)
+% applyDatasetConfiguration - Apply dataset configuration to datasets of an NWB object
     
     arguments
         nwbObject (1,1) NwbFile
-        chunkConfiguration (1,1) struct = io.config.readDefaultChunkConfiguration() % Todo: class for this...?
+        datasetConfiguration (1,1) struct = io.config.readDatasetConfiguration()
         options.OverrideExisting (1,1) logical = false
     end
     
-    import io.config.internal.resolveDataTypeChunkConfig
+    import io.config.internal.resolveDatasetConfigForDataType
 
     neurodataObjects = getNeurodataObjectsFromNwbFile(nwbObject);
 
@@ -29,8 +29,8 @@ function applyChunkConfiguration(nwbObject, chunkConfiguration, options)
                     continue
                 end
 
-                datasetConfig = resolveDataTypeChunkConfig(...
-                    chunkConfiguration, ...
+                datasetConfig = resolveDatasetConfigForDataType(...
+                    datasetConfiguration, ...
                     thisNeurodataObject, ...
                     thisDatasetName);
     
@@ -77,7 +77,7 @@ function applyChunkConfiguration(nwbObject, chunkConfiguration, options)
     objectMap = nwbObject.searchFor('types.');
 
     neurodataObjects = objectMap.values();
-    neurodataClassNames = cellfun(@(c) class(c), neurodataObjects, 'uni', 0); 
+    neurodataClassNames = cellfun(@(c) class(c), neurodataObjects, 'uni', 0);
 
     toIgnore = startsWith(neurodataClassNames, "types.untyped");
     neurodataObjects(toIgnore) = [];
diff --git a/+io/+config/readDatasetConfiguration.m b/+io/+config/readDatasetConfiguration.m
new file mode 100644
index 00000000..06d5ace1
--- /dev/null
+++ b/+io/+config/readDatasetConfiguration.m
@@ -0,0 +1,44 @@
+function datasetConfig = readDatasetConfiguration(profile)
+% READDATASETCONFIGURATION Reads the default dataset configuration from a JSON file.
+%
+% Syntax:
+%  configObject = io.config.READDATASETCONFIGURATION() loads the default
+%  dataset configuration parameters from a JSON file located in the
+%  "configuration" folder in the MatNWB root directory.
+%
+%  configObject = io.config.READDATASETCONFIGURATION(profile) loads the
+%  dataset configuration parameters for the specified "configuration profile"
+%  from a JSON file located in the "configuration" folder in the MatNWB root 
+%  directory.
+%
+% Output Arguments:
+%   - datasetConfig - A MATLAB structure containing the dataset configuration
+%                     parameters (chunking & compression) defined in the JSON 
+%                     configuration file.
+%
+% Example 1 - Load default dataset configurations::
+%
+%    % Load the default dataset configuration
+%    datasetConfig = io.config.readDatasetConfiguration();
+%    disp(datasetConfig);
+
+    arguments
+        profile (1,1) string {mustBeMember(profile, [ ...
+            "default", ...
+            "cloud", ...
+            "archive"
+            ])} = "default"
+    end
+
+    switch profile
+        case "default"
+            filename = 'default_dataset_configuration.json';
+        case "cloud"
+            filename = 'cloud_dataset_configuration.json';
+        case "archive"
+            filename = 'archive_dataset_configuration.json';
+    end
+
+    configFilePath = fullfile(misc.getMatnwbDir, 'configuration', filename);
+    datasetConfig = jsondecode(fileread(configFilePath));
+end
diff --git a/+io/+config/readDefaultChunkConfiguration.m b/+io/+config/readDefaultChunkConfiguration.m
deleted file mode 100644
index 6ee3a720..00000000
--- a/+io/+config/readDefaultChunkConfiguration.m
+++ /dev/null
@@ -1,24 +0,0 @@
-function configObject = readDefaultChunkConfiguration()
-% READDEFAULTCHUNKCONFIGURATION Reads the default chunking configuration from a JSON file.
-%
-% Syntax:
-%   configObject = io.config.READDEFAULTCHUNKCONFIGURATION() loads the default 
-%   chunking parameters from a JSON configuration file located in the
-%   "configuration" folder inside the MatNWB directory.
-%
-% Output Arguments:
-%   - configObject - A MATLAB structure containing the chunking parameters
-%                      defined in the JSON configuration file.
-%
-% Example 1 - Load default dataset configurations::
-%    % Load the default chunk configuration
-%    config = readDefaultChunkConfiguration();
-%    disp(config);
-
-    configFilePath = fullfile(...
-        misc.getMatnwbDir, ...
-        'configuration', ...
-        'cloud_dataset_configuration.json');
-
-    configObject = jsondecode(fileread(configFilePath));
-end
diff --git a/+tests/+unit/+io/+config/DatasetConfigurationTest.m b/+tests/+unit/+io/+config/DatasetConfigurationTest.m
new file mode 100644
index 00000000..ed2057ba
--- /dev/null
+++ b/+tests/+unit/+io/+config/DatasetConfigurationTest.m
@@ -0,0 +1,166 @@
+classdef DatasetConfigurationTest < matlab.unittest.TestCase
+% Tests for io.config.applyDatasetConfiguration function
+    
+    properties
+        DefaultConfig
+    end
+    
+    methods(TestMethodSetup)
+        function setup(testCase)
+            % Setup default configuration before each test
+            testCase.DefaultConfig = io.config.readDatasetConfiguration();
+        end
+    end
+    
+    methods(Test)
+        function testBasicFunctionality(testCase)
+            % Test basic functionality with default configuration
+            nwbFile = NwbFile( ...
+                'identifier', 'TEST123', ...
+                'session_description', 'test session', ...
+                'session_start_time', datetime());
+            
+            % Should not throw any errors
+            io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig);
+        end
+        
+        function testNumericDatasetConfiguration(testCase)
+            % Test configuration of numeric datasets
+            nwbFile = NwbFile( ...
+                'identifier', 'TEST123', ...
+                'session_description', 'test session', ...
+                'session_start_time', datetime());
+            
+            % Create a large numeric dataset
+            data = types.core.TimeSeries( ...
+                'data', rand(1000, 1000), ...
+                'data_unit', 'n/a', ...
+                'timestamps', 1:1000);
+            
+            nwbFile.acquisition.set('test_data', data);
+            
+            % Apply configuration
+            io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig);
+            
+            % Verify the dataset was converted to DataPipe
+            testCase.verifyTrue(isa(nwbFile.acquisition.get('test_data').data, ...
+                'types.untyped.DataPipe'), ...
+                'Large numeric dataset should be converted to DataPipe');
+        end
+        
+        function testSmallNumericDataset(testCase)
+            % Test that small numeric datasets remain unchanged
+            nwbFile = NwbFile( ...
+                'identifier', 'TEST123', ...
+                'session_description', 'test session', ...
+                'session_start_time', datetime());
+            
+            % Create a small numeric dataset
+            data = types.core.TimeSeries( ...
+                'data', rand(10, 10), ...
+                'data_unit', 'n/a', ...
+                'timestamps', 1:10);
+            
+            nwbFile.acquisition.set('test_data', data);
+            
+            % Apply configuration
+            io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig);
+            
+            % Verify the dataset remains numeric
+            testCase.verifyTrue(isnumeric(nwbFile.acquisition.get('test_data').data), ...
+                'Small numeric dataset should remain numeric');
+        end
+        
+        function testOverrideExisting(testCase)
+            % Test override behavior for existing DataPipe objects
+            nwbFile = NwbFile( ...
+                'identifier', 'TEST123', ...
+                'session_description', 'test session', ...
+                'session_start_time', datetime());
+            
+            % Create a DataPipe object
+            rawData = rand(1000, 1000);
+            dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, 'chunk_size', 100);
+            
+            data = types.core.TimeSeries( ...
+                'data', dataPipe, ...
+                'data_unit', 'n/a', ...
+                'timestamps', 1:1000);
+            
+            nwbFile.acquisition.set('test_data', data);
+            
+            % Apply configuration with override
+            io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig, ...
+                'OverrideExisting', true);
+            
+            % Verify the DataPipe was reconfigured
+            resultPipe = nwbFile.acquisition.get('test_data').data;
+            testCase.verifyTrue(isa(resultPipe, 'types.untyped.DataPipe'), ...
+                'Result should still be a DataPipe');
+        end
+        
+        function testNoOverrideExisting(testCase)
+            % Test that existing DataPipe objects are not modified without override
+            nwbFile = NwbFile( ...
+                'identifier', 'TEST123', ...
+                'session_description', 'test session', ...
+                'session_start_time', datetime());
+            
+            % Create a DataPipe object with specific configuration
+            rawData = rand(1000, 1000);
+            originalChunkSize = 100;
+            dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, ...
+                'chunk_size', originalChunkSize);
+            
+            data = types.core.TimeSeries( ...
+                'data', dataPipe, ...
+                'data_unit', 'n/a', ...
+                'timestamps', 1:1000);
+            
+            nwbFile.acquisition.set('test_data', data);
+            
+            % Apply configuration without override
+            io.config.applyDatasetConfiguration(nwbFile, testCase.DefaultConfig, ...
+                'OverrideExisting', false);
+            
+            % Verify the DataPipe configuration remains unchanged
+            resultPipe = nwbFile.acquisition.get('test_data').data;
+            testCase.verifyEqual(resultPipe.chunk_size, originalChunkSize, ...
+                'DataPipe configuration should remain unchanged without override');
+        end
+        
+        function testGetNeurodataObjects(testCase)
+            % Test the nested getNeurodataObjectsFromNwbFile function
+            nwbFile = NwbFile( ...
+                'identifier', 'TEST123', ...
+                'session_description', 'test session', ...
+                'session_start_time', datetime());
+            
+            % Add various types of objects
+            timeseries = types.core.TimeSeries( ...
+                'data', rand(10, 10), ...
+                'data_unit', 'n/a', ...
+                'timestamps', 1:10);
+            
+            nwbFile.acquisition.set('test_timeseries', timeseries);
+            
+            % Add an untyped object that should be ignored
+            untypedObj = types.untyped.Group();
+            nwbFile.acquisition.set('untyped_obj', untypedObj);
+            
+            % Get private access to the nested function
+            metaClass = metaclass(nwbFile);
+            methodList = metaClass.MethodList;
+            getNeurodataObjectsFcn = str2func('io.config.applyDatasetConfiguration>getNeurodataObjectsFromNwbFile');
+            
+            % Call the function
+            neurodataObjects = getNeurodataObjectsFcn(nwbFile);
+            
+            % Verify results
+            testCase.verifySize(neurodataObjects, [1 1], ...
+                'Should find one neurodata object');
+            testCase.verifyTrue(isa(neurodataObjects{1}, 'types.core.TimeSeries'), ...
+                'Should find TimeSeries object');
+        end
+    end
+end

From 665bf5c60df446a3cac3a3077cf2a2bdd0c02de5 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Wed, 22 Jan 2025 12:17:19 +0100
Subject: [PATCH 11/19] Test-related fixes

---
 .../+internal/computeChunkSizeFromConfig.m    |  2 +-
 .../+internal/configureDataPipeFromData.m     |  3 +-
 .../+io/+config/DatasetConfigurationTest.m    | 42 ++-----------------
 .../default_dataset_configuration.json        |  2 +-
 4 files changed, 8 insertions(+), 41 deletions(-)

diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
index 2fb76843..4a6de83c 100644
--- a/+io/+config/+internal/computeChunkSizeFromConfig.m
+++ b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -18,7 +18,7 @@
         datasetConfig (1,1) struct
     end
     
-    assert(isfield(datasetConfig, 'chunk_dimensions')), ...
+    assert(isfield(datasetConfig, 'chunk_dimensions'), ...
         'Expected datasetConfig to have field "chunk_dimensions"')
     assert(isfield(datasetConfig, 'target_chunk_size'), ...
         'Expected datasetConfig to have field "target_chunk_size"')
diff --git a/+io/+config/+internal/configureDataPipeFromData.m b/+io/+config/+internal/configureDataPipeFromData.m
index 04f321ba..d4b8eff9 100644
--- a/+io/+config/+internal/configureDataPipeFromData.m
+++ b/+io/+config/+internal/configureDataPipeFromData.m
@@ -12,7 +12,8 @@
         "maxSize", maxSize, ...
         "chunkSize", chunkSize };
 
-    hasShuffle = contains(datasetConfig.compression.prefilters, 'shuffle');
+    hasShuffle = ~isempty(datasetConfig.compression.prefilters)...
+                 && contains(datasetConfig.compression.prefilters, 'shuffle');
 
     if strcmpi(datasetConfig.compression.algorithm, "Deflate")
         % Use standard compression filters
diff --git a/+tests/+unit/+io/+config/DatasetConfigurationTest.m b/+tests/+unit/+io/+config/DatasetConfigurationTest.m
index ed2057ba..c285a36a 100644
--- a/+tests/+unit/+io/+config/DatasetConfigurationTest.m
+++ b/+tests/+unit/+io/+config/DatasetConfigurationTest.m
@@ -80,7 +80,7 @@ function testOverrideExisting(testCase)
             
             % Create a DataPipe object
             rawData = rand(1000, 1000);
-            dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, 'chunk_size', 100);
+            dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, 'chunkSize', 100);
             
             data = types.core.TimeSeries( ...
                 'data', dataPipe, ...
@@ -108,9 +108,9 @@ function testNoOverrideExisting(testCase)
             
             % Create a DataPipe object with specific configuration
             rawData = rand(1000, 1000);
-            originalChunkSize = 100;
+            originalChunkSize = [100, 100];
             dataPipe = types.untyped.DataPipe('data', rawData, 'axis', 1, ...
-                'chunk_size', originalChunkSize);
+                'chunkSize', originalChunkSize);
             
             data = types.core.TimeSeries( ...
                 'data', dataPipe, ...
@@ -125,42 +125,8 @@ function testNoOverrideExisting(testCase)
             
             % Verify the DataPipe configuration remains unchanged
             resultPipe = nwbFile.acquisition.get('test_data').data;
-            testCase.verifyEqual(resultPipe.chunk_size, originalChunkSize, ...
+            testCase.verifyEqual(resultPipe.chunkSize, originalChunkSize, ...
                 'DataPipe configuration should remain unchanged without override');
         end
-        
-        function testGetNeurodataObjects(testCase)
-            % Test the nested getNeurodataObjectsFromNwbFile function
-            nwbFile = NwbFile( ...
-                'identifier', 'TEST123', ...
-                'session_description', 'test session', ...
-                'session_start_time', datetime());
-            
-            % Add various types of objects
-            timeseries = types.core.TimeSeries( ...
-                'data', rand(10, 10), ...
-                'data_unit', 'n/a', ...
-                'timestamps', 1:10);
-            
-            nwbFile.acquisition.set('test_timeseries', timeseries);
-            
-            % Add an untyped object that should be ignored
-            untypedObj = types.untyped.Group();
-            nwbFile.acquisition.set('untyped_obj', untypedObj);
-            
-            % Get private access to the nested function
-            metaClass = metaclass(nwbFile);
-            methodList = metaClass.MethodList;
-            getNeurodataObjectsFcn = str2func('io.config.applyDatasetConfiguration>getNeurodataObjectsFromNwbFile');
-            
-            % Call the function
-            neurodataObjects = getNeurodataObjectsFcn(nwbFile);
-            
-            % Verify results
-            testCase.verifySize(neurodataObjects, [1 1], ...
-                'Should find one neurodata object');
-            testCase.verifyTrue(isa(neurodataObjects{1}, 'types.core.TimeSeries'), ...
-                'Should find TimeSeries object');
-        end
     end
 end
diff --git a/configuration/default_dataset_configuration.json b/configuration/default_dataset_configuration.json
index 8443ffd7..df12d3e7 100644
--- a/configuration/default_dataset_configuration.json
+++ b/configuration/default_dataset_configuration.json
@@ -2,7 +2,7 @@
     "Default": {
         "layout": "chunked",
         "target_chunk_size": {
-            "value": null,
+            "value": 1000000,
             "unit": "bytes"
         },
         "chunk_dimensions": [

From 32771ea938188f5c9ac110ea6b6ee9ff5c66c16b Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Wed, 22 Jan 2025 12:35:30 +0100
Subject: [PATCH 12/19] simplify readDatasetConfiguration

Replaces switch block with formatted string
---
 +io/+config/readDatasetConfiguration.m | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/+io/+config/readDatasetConfiguration.m b/+io/+config/readDatasetConfiguration.m
index 06d5ace1..4a05e8b6 100644
--- a/+io/+config/readDatasetConfiguration.m
+++ b/+io/+config/readDatasetConfiguration.m
@@ -30,14 +30,7 @@
             ])} = "default"
     end
 
-    switch profile
-        case "default"
-            filename = 'default_dataset_configuration.json';
-        case "cloud"
-            filename = 'cloud_dataset_configuration.json';
-        case "archive"
-            filename = 'archive_dataset_configuration.json';
-    end
+    filename = sprintf('%s_dataset_configuration.json', profile);
 
     configFilePath = fullfile(misc.getMatnwbDir, 'configuration', filename);
     datasetConfig = jsondecode(fileread(configFilePath));

From 8bde77596f96e89c56cef4a7a9143222571fc8e9 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 11:43:19 +0100
Subject: [PATCH 13/19] Create applyCustomMatNWBPropertyNames.m

Function that will ensure the dataset configuration conforms with MatNWB specific implementation details
---
 .../applyCustomMatNWBPropertyNames.m          | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 +io/+config/+internal/applyCustomMatNWBPropertyNames.m

diff --git a/+io/+config/+internal/applyCustomMatNWBPropertyNames.m b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m
new file mode 100644
index 00000000..d56ece38
--- /dev/null
+++ b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m
@@ -0,0 +1,73 @@
+function datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
+    
+    arguments
+        datasetConfiguration (1,1) struct
+    end
+    
+    fields = fieldnames(datasetConfiguration);
+    classNameMap = getNwbTypesClassnameMap();
+
+    for i = 1:numel(fields)
+        
+        thisField = fields{i};
+        if ~isKey(classNameMap, thisField)
+            continue
+        end
+        
+        fullClassName = classNameMap(thisField);
+        superclassNames = superclasses(fullClassName);
+
+        if any(strcmp(superclassNames, "types.untyped.MetaClass"))
+            thisSubConfig = datasetConfiguration.(thisField);
+            if any(strcmp(superclassNames, "types.untyped.GroupClass"))
+                % Recursively process subgroups
+                datasetConfiguration.(thisField) = ...
+                    io.config.internal.applyCustomMatNWBPropertyNames(thisSubConfig);
+            elseif any(strcmp(superclassNames, "types.untyped.DatasetClass"))
+                % MatNWB adds a "data" property on Dataset type classes,
+                % which is not originally part of the schema.
+                datasetConfiguration.(thisField) = struct('data', thisSubConfig);
+            else
+                error('NWB:UnexpectedError', 'Something unexpected happened.')
+            end
+        else
+            % Do nothing.
+        end
+    end
+end
+
+function ancestorPath = getAncestorPath(initialPath, numSteps)
+    arguments
+        initialPath (1,1) string
+        numSteps (1,1) double
+    end
+    splitPath = split(initialPath, filesep);
+    
+    ancestorPath = fullfile(splitPath{1:end-numSteps}); % char output
+    if isunix && ~startsWith(ancestorPath, filesep)
+        ancestorPath = [filesep ancestorPath];
+    end
+end
+
+function map = getNwbTypesClassnameMap()
+
+    typesClassDirectory = getAncestorPath( which('types.core.NWBFile'), 2 );
+    
+    % Find names of all nwb types:
+    L = dir(fullfile(typesClassDirectory, '**', '*.m'));
+    ignore = contains({L.folder}, fullfile('+types', '+untyped')) | ...
+                contains({L.folder}, fullfile('+types', '+util'));
+    L(ignore) = [];
+
+
+    [~, namespaceNames] = fileparts({L.folder});
+    namespaceNames = string( strrep(namespaceNames, '+', '') );
+    classNames = string( strrep( {L.name}, '.m', '') );
+
+    fullClassNames = compose("types.%s.%s", namespaceNames', classNames');
+    try
+        map = dictionary(classNames', fullClassNames);
+    catch % If older version of MATLAB
+        map = containers.Map(classNames, fullClassNames);
+    end
+end
\ No newline at end of file

From 12f0453f23747f892128269cd9234ebab9ae57a7 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 14:06:56 +0100
Subject: [PATCH 14/19] Update configuration/archive_dataset_configuration.json

Co-authored-by: Ben Dichter <ben.dichter@gmail.com>
---
 configuration/archive_dataset_configuration.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configuration/archive_dataset_configuration.json b/configuration/archive_dataset_configuration.json
index 94b7bd23..0b26f018 100644
--- a/configuration/archive_dataset_configuration.json
+++ b/configuration/archive_dataset_configuration.json
@@ -13,7 +13,7 @@
         ],
         "compression": {
             "algorithm": "ZStandard",
-            "level": 9,
+            "level": 5,
             "parameters": {},
             "prefilters": ["shuffle"]
         }

From 7bfc2c8c8c26404eb4510d867be8aed56ee63047 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 14:20:15 +0100
Subject: [PATCH 15/19] Add docstring for function
 applyCustomMatNWBPropertyNames.m

---
 .../applyCustomMatNWBPropertyNames.m          | 81 ++++++++++++++++---
 +matnwb/+common/composeFullClassName.m        |  9 +++
 2 files changed, 78 insertions(+), 12 deletions(-)
 create mode 100644 +matnwb/+common/composeFullClassName.m

diff --git a/+io/+config/+internal/applyCustomMatNWBPropertyNames.m b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m
index d56ece38..4fa82c3b 100644
--- a/+io/+config/+internal/applyCustomMatNWBPropertyNames.m
+++ b/+io/+config/+internal/applyCustomMatNWBPropertyNames.m
@@ -1,17 +1,42 @@
 function datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
-    
+% applyCustomMatNWBPropertyNames - Processes a dataset configuration structure to apply custom MatNWB property names.
+%
+%   datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
+%
+%   This function iterates through each field of the input structure and checks 
+%   if the field corresponds to a known NWB type (using a mapping from short 
+%   names to fully qualified class names). For each recognized field:
+%
+%      - It retrieves the full class name and determines its superclasses.
+%      - If the class is a subclass of "types.untyped.MetaClass":
+%           * If it is also a "types.untyped.GroupClass", the function recursively
+%             processes the subgroup configuration.
+%           * If it is a "types.untyped.DatasetClass", it wraps the existing 
+%             configuration in a structure with a "data" property.
+%      - If the field is not associated with a recognized NWB type, it remains 
+%        unchanged.
+%
+%   Input:
+%       datasetConfiguration - A 1x1 struct containing dataset configuration 
+%           data.
+%
+%   Output:
+%       datasetConfiguration - The updated configuration structure with custom 
+%           property names.
+
     arguments
         datasetConfiguration (1,1) struct
     end
     
     fields = fieldnames(datasetConfiguration);
+
     classNameMap = getNwbTypesClassnameMap();
 
     for i = 1:numel(fields)
         
         thisField = fields{i};
         if ~isKey(classNameMap, thisField)
-            continue
+            continue % Not a neurodata / nwb type
         end
         
         fullClassName = classNameMap(thisField);
@@ -20,23 +45,34 @@
         if any(strcmp(superclassNames, "types.untyped.MetaClass"))
             thisSubConfig = datasetConfiguration.(thisField);
             if any(strcmp(superclassNames, "types.untyped.GroupClass"))
-                % Recursively process subgroups
+                % Recursively process subgroups.
                 datasetConfiguration.(thisField) = ...
                     io.config.internal.applyCustomMatNWBPropertyNames(thisSubConfig);
             elseif any(strcmp(superclassNames, "types.untyped.DatasetClass"))
-                % MatNWB adds a "data" property on Dataset type classes,
-                % which is not originally part of the schema.
+                % Wrap Dataset type configurations in a struct with a "data" field.
                 datasetConfiguration.(thisField) = struct('data', thisSubConfig);
             else
                 error('NWB:UnexpectedError', 'Something unexpected happened.')
             end
         else
-            % Do nothing.
+            % For non-NWB types, leave the field unmodified.
         end
     end
 end
 
 function ancestorPath = getAncestorPath(initialPath, numSteps)
+% getAncestorPath - Get an ancestor directory path.
+%
+%   ancestorPath = GETANCESTORPATH(initialPath, numSteps)
+%
+%   Input:
+%       initialPath - A string representing the starting file or directory path.
+%       numSteps    - A positive integer indicating the number of directory 
+%                     levels to move up.
+%
+%   Output:
+%       ancestorPath - A string representing the ancestor directory path.
+
     arguments
         initialPath (1,1) string
         numSteps (1,1) double
@@ -44,30 +80,51 @@
     splitPath = split(initialPath, filesep);
     
     ancestorPath = fullfile(splitPath{1:end-numSteps}); % char output
+
+    % Ensure the path starts with a file separator on Unix systems.
     if isunix && ~startsWith(ancestorPath, filesep)
         ancestorPath = [filesep ancestorPath];
     end
 end
 
 function map = getNwbTypesClassnameMap()
+% getNwbTypesClassnameMap - Constructs a mapping between NWB type short names 
+% and their fully qualified class names.
+%
+%   map = GETNWBTYPESCLASSNAMEMAP()
+%
+%   The function locates the directory containing NWB type definitions 
+%   (using the location of 'types.core.NWBFile' as a reference) and searches 
+%   recursively for all MATLAB class definition files (*.m). It then filters 
+%   out files in the '+types/+untyped' and '+types/+util' folders.
+%
+%   Output:
+%       map - A mapping object (either a dictionary or containers.Map) where:
+%             * Keys   : Short class names (derived from file names without the .m extension).
+%             * Values : Fully qualified class names in the format "types.namespace.ClassName".
 
     typesClassDirectory = getAncestorPath( which('types.core.NWBFile'), 2 );
     
-    % Find names of all nwb types:
+    % Find all MATLAB class files recursively within the directory.
     L = dir(fullfile(typesClassDirectory, '**', '*.m'));
+    
+    % Exclude files from the '+types/+untyped' and '+types/+util' directories.
     ignore = contains({L.folder}, fullfile('+types', '+untyped')) | ...
                 contains({L.folder}, fullfile('+types', '+util'));
     L(ignore) = [];
 
-
+    % Extract namespace and class names from the file paths.
     [~, namespaceNames] = fileparts({L.folder});
     namespaceNames = string( strrep(namespaceNames, '+', '') );
     classNames = string( strrep( {L.name}, '.m', '') );
 
-    fullClassNames = compose("types.%s.%s", namespaceNames', classNames');
+    % Compose fully qualified class names using the namespace and class name.
+    fullClassNames = matnwb.common.composeFullClassName(namespaceNames, classNames);
+
+    % Create a mapping from the short class names to the fully qualified class names.
     try
-        map = dictionary(classNames', fullClassNames);
-    catch % If older version of MATLAB
+        map = dictionary(classNames, fullClassNames);
+    catch % Fallback for older versions of MATLAB.
         map = containers.Map(classNames, fullClassNames);
     end
-end
\ No newline at end of file
+end
diff --git a/+matnwb/+common/composeFullClassName.m b/+matnwb/+common/composeFullClassName.m
new file mode 100644
index 00000000..1c08a09a
--- /dev/null
+++ b/+matnwb/+common/composeFullClassName.m
@@ -0,0 +1,9 @@
+function fullClassName = composeFullClassName(namespaceName, neurodataType)
+    arguments
+        namespaceName (:, 1) string
+        neurodataType (:, 1) string
+    end
+
+    fullClassName = compose("types.%s.%s", namespaceName, neurodataType);
+    fullClassName = transpose(fullClassName); % Return as row vector
+end

From a986a95ff14ef1412f83a680317fa409c311aa5e Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 20:54:10 +0100
Subject: [PATCH 16/19] Update listDatasetsOfNeurodataType.m

Resolve name for dataset if the name field is missing
---
 +schemes/listDatasetsOfNeurodataType.m | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/+schemes/listDatasetsOfNeurodataType.m b/+schemes/listDatasetsOfNeurodataType.m
index 1d5c6654..216f5045 100644
--- a/+schemes/listDatasetsOfNeurodataType.m
+++ b/+schemes/listDatasetsOfNeurodataType.m
@@ -33,8 +33,13 @@
                 for i = 1:numel(datasetMaps)
                     if isKey(datasetMaps{i}, 'name')
                         datasetNames(i) = datasetMaps{i}('name');
+                    elseif isKey(datasetMaps{i}, 'data_type_inc')
+                        datasetNames(i) = lower( datasetMaps{i}('data_type_inc') );
+                    elseif isKey(datasetMaps{i}, 'data_type_def')
+                        datasetNames(i) = lower( datasetMaps{i}('data_type_def') );
                     else
                         keyboard
+                        error('NWB:UnexpectedError', 'Something unexpected happened.')
                     end
                 end
                 datasetNames(datasetNames=="") = [];

From 9e7f242b9b65aaee873c6f2bfab3aadf9ca3c201 Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 20:56:31 +0100
Subject: [PATCH 17/19] Fix compute chunk size

Rename flexible dimension to "flex"
Use product of fixed dimensions to compute size of flex dimension
---
 .../+internal/computeChunkSizeFromConfig.m    | 77 +++++++++++++++----
 .../cloud_dataset_configuration.json          | 24 +++---
 2 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/+io/+config/+internal/computeChunkSizeFromConfig.m b/+io/+config/+internal/computeChunkSizeFromConfig.m
index 4a6de83c..a9eec0e9 100644
--- a/+io/+config/+internal/computeChunkSizeFromConfig.m
+++ b/+io/+config/+internal/computeChunkSizeFromConfig.m
@@ -3,7 +3,7 @@
 %   This function determines the chunk size for a dataset based on the chunk
 %   dimensions provided in the datasetConfig structure. It adjusts dimensions 
 %   according to rules: 'max' uses the dataset size, fixed numbers use their 
-%   value, and 'null' calculates the dimension size to approximate the target 
+%   value, and 'flex' calculates the dimension size to approximate the target 
 %   chunk size in bytes.
 %
 %   Inputs:
@@ -28,18 +28,47 @@
     dataSize = fliplr(dataSize);  % matnwb quirk
     numDimensions = numel(dataSize);
     
-    % Extract relevant configuration parameters
+    % Extract chunk dimensions configuration
     chunkDimensions = datasetConfig.chunk_dimensions;
-    if iscell(chunkDimensions)
-        numChunkDimensions = cellfun(@numel, chunkDimensions);
+    if ~iscell(chunkDimensions) 
+        if isscalar(chunkDimensions)
+            chunkDimensions = {chunkDimensions};
+        else
+            error('Unexpected chunk_dimensions format.');
+        end
+    end
+
+    % Find the chunk dimensions specification matching the number of
+    % dimensions of the input array A
+    numChunkDimensions = cellfun(@numel, chunkDimensions);
+    if any(ismember(numChunkDimensions, numDimensions))
         chunkDimensions = chunkDimensions{numChunkDimensions == numDimensions};
+    elseif all(numDimensions > numChunkDimensions)
+        chunkDimensions = chunkDimensions{end};
+    else
+        error('NWB:UnexpectedError', 'Unexpected chunk dimension size.')
+    end
+
+    if ~iscell(chunkDimensions)
+        chunkDimensions = arrayfun(@(x) x, chunkDimensions, 'UniformOutput', false);
     end
 
     defaultChunkSize = datasetConfig.target_chunk_size.value; % in bytes
     dataByteSize = io.config.internal.getDataByteSize(A);
 
+    elementSize = io.config.internal.getDataByteSize(A) / numel(A); % bytes per element
+
+    % Determine the target number of elements per chunk.
+    targetNumElements = defaultChunkSize / elementSize;
+
     % Initialize chunk size array
     chunkSize = zeros(1, numDimensions);
+    flexDims = false(1, numDimensions);
+
+    assert(iscell(chunkDimensions), "Something unexpected happened")
+
+    isFlex = @(x) ischar(x) && strcmp(x, 'flex');
+    isMax = @(x) ischar(x) && strcmp(x, 'max');
 
     % Calculate chunk size for each dimension
     for dim = 1:numDimensions
@@ -48,18 +77,12 @@
             chunkSize(dim) = dataSize(dim);
         else
             dimSpec = chunkDimensions{dim};
-            if isempty(dimSpec)
-                % Compute chunk size for 'null' dimensions
-                % Estimate proportional size based on remaining chunk size
-                remainingChunkSize = defaultChunkSize / dataByteSize; % scale factor for all dimensions
-                nullDimensions = find(cellfun(@isempty, chunkDimensions));
-                proportionalSize = nthroot(remainingChunkSize, numel(nullDimensions));
-                chunkSize(dim) = max(1, round(proportionalSize*dataSize(dim)));
+            if isFlex(dimSpec)
+                flexDims(dim) = true;
+                % Leave chunkSize(dim) to be determined.
             elseif isnumeric(dimSpec)
-                % Fixed chunk size
                 chunkSize(dim) = dimSpec;
-            elseif ischar(dimSpec) && strcmp(dimSpec, 'max')
-                % Use full dimension size
+            elseif isMax(dimSpec)
                 chunkSize(dim) = dataSize(dim);
             else
                 error('Invalid chunk specification for dimension %d.', dim);
@@ -67,7 +90,31 @@
         end
     end
 
+    % Compute the product of fixed dimensions (number of elements per chunk).
+    if any(~flexDims)
+        fixedProduct = prod(chunkSize(~flexDims));
+    else
+        fixedProduct = 1;
+    end
+
+    % For flex dimensions, compute the remaining number of elements
+    % and allocate them equally in the exponent space.
+    nFlex = sum(flexDims);
+    if nFlex > 0
+        remainingElements = targetNumElements / fixedProduct;
+        % Ensure remainingElements is at least 1.
+        remainingElements = max(remainingElements, 1);
+        % Compute an equal allocation factor for each flex dimension.
+        elementsPerFlexDimension = nthroot(remainingElements, nFlex);
+        % Assign computed chunk size for each flex dimension.
+        for dim = find(flexDims)
+            proposedSize = max(1, round(elementsPerFlexDimension));
+            % Do not exceed the full dimension size.
+            chunkSize(dim) = min(proposedSize, dataSize(dim));
+        end
+    end
+
     % Ensure chunk size does not exceed dataset dimensions
-    chunkSize = min(chunkSize, dataSize);
     chunkSize = fliplr(chunkSize);
+    chunkSize = min(chunkSize, dataSize);
 end
diff --git a/configuration/cloud_dataset_configuration.json b/configuration/cloud_dataset_configuration.json
index b410b2fc..d162cd8c 100644
--- a/configuration/cloud_dataset_configuration.json
+++ b/configuration/cloud_dataset_configuration.json
@@ -6,10 +6,10 @@
             "unit": "bytes"
         },
         "chunk_dimensions": [
-            [null], 
-            [null, "max"], 
-            [null, "max", "max"], 
-            [null, "max", "max", "max"]
+            ["flex"], 
+            ["flex", "max"], 
+            ["flex", "max", "max"], 
+            ["flex", "max", "max", "max"]
         ],
         "compression": {
             "algorithm": "deflate",
@@ -18,26 +18,32 @@
             "prefilters": ["shuffle"]
         }
     },
+    "VectorData": {
+        "compression": {
+            "algorithm": "deflate",
+            "level": 7
+        }
+    },
     "TimeSeries": {
         "data": {
-            "chunk_dimensions": [[null, 32], [null, 32, "max"]],
+            "chunk_dimensions": [["flex", 32], ["flex", 32, "max"]],
             "compression": {
                 "algorithm": "deflate",
                 "level": 4
             }
         },
         "timestamps": {
-            "chunk_dimensions": [null]
+            "chunk_dimensions": ["flex"]
         }
     },
     "ImageSeries": {
         "data": {
-            "chunk_dimensions": [[null, "max", "max"], [null, "max", "max", "max"]]
+            "chunk_dimensions": [["flex", "max", "max"], ["flex", "max", "max", "max"]]
         }
     },
     "ElectricalSeries": {
         "data": {
-            "chunk_dimensions": [[null, 32], [null, 32, "max"]]
+            "chunk_dimensions": [["flex", 64], ["flex", 64, "max"]]
         }
     },
     "SpikeEventSeries": {
@@ -45,4 +51,4 @@
             "chunk_dimensions": [1000]
         }
     }
-}
\ No newline at end of file
+}

From 58698fe3f2ca24dd8e67aee1d52c40ef6c7e806b Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 20:58:02 +0100
Subject: [PATCH 18/19] Update readDatasetConfiguration.m

Add function to update dataset configuration to conform with MatNWB specific implementation (i.e, Dataset types (like VectorData) having a data property)
---
 +io/+config/readDatasetConfiguration.m | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/+io/+config/readDatasetConfiguration.m b/+io/+config/readDatasetConfiguration.m
index 4a05e8b6..38c44ead 100644
--- a/+io/+config/readDatasetConfiguration.m
+++ b/+io/+config/readDatasetConfiguration.m
@@ -34,4 +34,6 @@
 
     configFilePath = fullfile(misc.getMatnwbDir, 'configuration', filename);
     datasetConfig = jsondecode(fileread(configFilePath));
+
+    datasetConfig = io.config.internal.applyCustomMatNWBPropertyNames(datasetConfig);
 end

From 560b501bba2a9a17318c5e48b067ce358bff802c Mon Sep 17 00:00:00 2001
From: ehennestad <ehennestad@gmail.com>
Date: Sat, 22 Feb 2025 20:58:27 +0100
Subject: [PATCH 19/19] Update resolveDatasetConfigForDataType.m

---
 +io/+config/+internal/resolveDatasetConfigForDataType.m | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/+io/+config/+internal/resolveDatasetConfigForDataType.m b/+io/+config/+internal/resolveDatasetConfigForDataType.m
index 402ba0a1..b058519d 100644
--- a/+io/+config/+internal/resolveDatasetConfigForDataType.m
+++ b/+io/+config/+internal/resolveDatasetConfigForDataType.m
@@ -1,8 +1,9 @@
 function resolvedOptions = resolveDatasetConfigForDataType(datasetConfig, nwbObject, datasetName)
 % resolveDatasetConfigForDataType - Resolve the dataset configuration for individual neurodata types
-%   This function resolves the dataset configuration options for a given NWB object
-%   by traversing the object hierarchy and combining options from the most specific
-%   type to the base type, as defined in the datasetConfig structure.
+%   This function resolves the dataset configuration options for a given NWB 
+%   object by traversing the object hierarchy and combining options from the 
+%   most specific type to the base type, as defined in the datasetConfig 
+%   structure.
 %
 %   Input:
 %       datasetConfig (struct): A struct representation of the dataset configuration JSON.