Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Append a Dataset of References #1135

Merged
merged 39 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
891fd95
checkpoint
mavaylon1 Jun 27, 2024
3b81034
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
285dd75
checkpoint
mavaylon1 Jul 2, 2024
a9a6cc0
Merge branch 'dev' into zarr_append
mavaylon1 Jul 2, 2024
e15359c
checkpoint
mavaylon1 Jul 2, 2024
e2b9057
checkpoint
mavaylon1 Jul 2, 2024
a335322
check point
mavaylon1 Jul 8, 2024
88ac84b
check point
mavaylon1 Jul 8, 2024
6cf4276
Merge branch 'dev' into zarr_append
mavaylon1 Jul 8, 2024
f5343e7
clean up
mavaylon1 Jul 8, 2024
e71f577
Update CHANGELOG.md
mavaylon1 Jul 8, 2024
c4623ad
test work in progress
mavaylon1 Jul 8, 2024
d816311
work in progress
mavaylon1 Jul 8, 2024
fc464cc
checkpoint
mavaylon1 Jul 10, 2024
c5d69d1
remove breakpoint
mavaylon1 Jul 10, 2024
638c38c
coverage
mavaylon1 Jul 10, 2024
ecf692f
coverage
mavaylon1 Jul 10, 2024
91ecd31
Merge branch 'dev' into zarr_append
mavaylon1 Jul 10, 2024
fde2bfb
clean up
mavaylon1 Jul 10, 2024
19b515b
external link
mavaylon1 Jul 10, 2024
3a2b716
Update install_users.rst
mavaylon1 Jul 10, 2024
f505321
Update objectmapper.py
mavaylon1 Jul 12, 2024
29a9c78
Update install_users.rst
mavaylon1 Jul 13, 2024
40e051e
Update install_developers.rst
mavaylon1 Jul 13, 2024
d3f7a21
clean
mavaylon1 Jul 22, 2024
90c4296
clean
mavaylon1 Jul 22, 2024
27bb840
Add reference check
rly Jul 25, 2024
dcea8a0
Merge branch 'dev' into zarr_append
mavaylon1 Jul 28, 2024
a1ad5b1
Merge branch 'dev' into zarr_append
mavaylon1 Aug 19, 2024
8f9777e
poc
mavaylon1 Aug 21, 2024
07d879f
clean up
mavaylon1 Aug 21, 2024
f7c5bc4
Merge branch 'dev' into zarr_append
mavaylon1 Aug 21, 2024
af8a26f
Update CHANGELOG.md
mavaylon1 Aug 21, 2024
d43401e
Update manager.py
mavaylon1 Aug 21, 2024
c942a7d
labels
mavaylon1 Aug 21, 2024
568d8c1
Update query.py
mavaylon1 Aug 22, 2024
5f89070
Update src/hdmf/backends/hdf5/h5_utils.py
mavaylon1 Aug 22, 2024
f005e32
Merge branch 'dev' into zarr_append
mavaylon1 Aug 22, 2024
d5ad0e4
tst cov
mavaylon1 Aug 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166)
- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165)
- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173)
- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135)

### Bug fixes
- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install_developers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command.
For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager
that includes conda-forge as a default channel.

.. _Anaconda: https://www.anaconda.com/products/distribution
.. _Anaconda: https://www.anaconda.com/download
.. _Mambaforge: https://github.com/conda-forge/miniforge

Install from GitHub
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install_users.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a

conda install -c conda-forge hdmf

.. _Anaconda Distribution: https://www.anaconda.com/products/distribution
.. _Anaconda Distribution: https://www.anaconda.com/download
16 changes: 15 additions & 1 deletion src/hdmf/backends/hdf5/h5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging

from ...array import Array
from ...data_utils import DataIO, AbstractDataChunkIterator
from ...data_utils import DataIO, AbstractDataChunkIterator, append_data
from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
from ...region import RegionSlicer
from ...spec import SpecWriter, SpecReader
Expand Down Expand Up @@ -108,6 +108,20 @@ def ref(self):
def shape(self):
return self.dataset.shape

def append(self, arg):
# Get Builder
builder = self.io.manager.get_builder(arg)
mavaylon1 marked this conversation as resolved.
Show resolved Hide resolved
if builder is None:
raise ValueError(
"The container being appended to the dataset has not yet been built. "
"Please write the container to the file, then open the modified file, and "
"append the read container to the dataset."
)

# Get HDF5 Reference
ref = self.io._create_ref(builder)
append_data(self.dataset, ref)


class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta):
"""
Expand Down
9 changes: 9 additions & 0 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,7 @@ def __get_ref(self, **kwargs):
self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name))
builder = self.manager.build(container)
path = self.__get_path(builder)

self.logger.debug("Getting reference at path '%s'" % path)
if isinstance(container, RegionBuilder):
region = container.region
Expand All @@ -1529,6 +1530,14 @@ def __get_ref(self, **kwargs):
else:
return self.__file[path].ref

@docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference',
'default': None},
{'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object',
'default': None},
returns='the reference', rtype=Reference)
def _create_ref(self, **kwargs):
return self.__get_ref(**kwargs)

def __is_ref(self, dtype):
if isinstance(dtype, DtypeSpec):
return self.__is_ref(dtype.dtype)
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
ConstructError)
from .manager import Proxy, BuildManager

from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning,
IncorrectDatasetShapeBuildWarning)
from hdmf.backends.hdf5.h5_utils import H5DataIO

from ..container import AbstractContainer, Data, DataRegion
from ..term_set import TermSetWrapper
from ..data_utils import DataIO, AbstractDataChunkIterator
Expand Down Expand Up @@ -978,6 +981,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager):
for d in container.data:
target_builder = self.__get_target_builder(d, build_manager, builder)
bldr_data.append(ReferenceBuilder(target_builder))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
else:
self.logger.debug("Setting %s '%s' data to reference builder"
% (builder.__class__.__name__, builder.name))
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ def __next__(self):
def next(self):
return self.dataset.next()

def append(self, arg):
"""
Override this method to support appending to backend-specific datasets
"""
pass # pragma: no cover


class ReferenceResolver(metaclass=ABCMeta):
"""
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3004,6 +3004,57 @@ def test_append_data(self):
self.assertEqual(f['foofile_data'].file.filename, self.paths[1])
self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference)

def test_append_dataset_of_references(self):
"""Test that exporting a written container with a dataset of references works."""
bazs = []
num_bazs = 1
for i in range(num_bazs):
bazs.append(Baz(name='baz%d' % i))
array_bazs=np.array(bazs)
wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,))
baz_data = BazData(name='baz_data1', data=wrapped_bazs)
bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
write_io.write(bucket)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io:
read_bucket1 = append_io.read()
new_baz = Baz(name='new')
read_bucket1.add_baz(new_baz)
append_io.write(read_bucket1)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
read_bucket1 = ref_io.read()
DoR = read_bucket1.baz_data.data
DoR.append(read_bucket1.bazs['new'])

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io:
read_bucket1 = read_io.read()
self.assertEqual(len(read_bucket1.baz_data.data), 2)
self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"])

def test_append_dataset_of_references_orphaned_target(self):
bazs = []
num_bazs = 1
for i in range(num_bazs):
bazs.append(Baz(name='baz%d' % i))
array_bazs=np.array(bazs)
wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,))
baz_data = BazData(name='baz_data1', data=wrapped_bazs)
bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
write_io.write(bucket)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
read_bucket1 = ref_io.read()
new_baz = Baz(name='new')
read_bucket1.add_baz(new_baz)
DoR = read_bucket1.baz_data.data
with self.assertRaises(ValueError):
DoR.append(read_bucket1.bazs['new'])

def test_append_external_link_data(self):
"""Test that exporting a written container after adding a link with link_data=True creates external links."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
Expand Down
Loading