hdmf-dev · mavaylon1 · Oct 23, 2024 · Aug 29, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # HDMF Changelog
 
+## HDMF 3.14.6 (Upcoming)
+
+### Enhancements
+- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188)
+
 ## HDMF 3.14.5 (October 6, 2024)
 
 ### Enhancements

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -163,16 +163,12 @@
 # html_theme = 'default'
 # html_theme = "sphinxdoc"
 html_theme = "sphinx_rtd_theme"
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 # html_theme_options = {}
 
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
 # html_title = None

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
@@ -934,6 +934,9 @@ def _filler():
                 for j, subt in refs:
                     tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager)
                 bldr_data.append(tuple(tmp))
+            if isinstance(container.data, H5DataIO):
+                # This is here to support appending a dataset of references.
+                bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
             builder.data = bldr_data
 
         return _filler
@@ -952,6 +955,9 @@ def _filler():
                 else:
                     target_builder = self.__get_target_builder(d, build_manager, builder)
                     bldr_data.append(ReferenceBuilder(target_builder))
+            if isinstance(container.data, H5DataIO):
+                # This is here to support appending a dataset of references.
+                bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
             builder.data = bldr_data
 
         return _filler

diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py
@@ -2852,6 +2852,57 @@ def test_dtr_references(self):
             pd.testing.assert_frame_equal(ret, expected)
 
 
+class TestDataIOReferences(H5RoundTripMixin, TestCase):
+
+    def setUpContainer(self):
+        """Test roundtrip of a table with an expandable column of references."""
+        group1 = Container('group1')
+        group2 = Container('group2')
+
+        table = DynamicTable(
+            name='table',
+            description='test table'
+        )
+        table.add_column(
+            name='x',
+            description='test column of ints'
+        )
+        table.add_column(
+            name='y',
+            description='test column of reference'
+        )
+        table.add_row(id=101, x=1, y=group1)
+        table.add_row(id=102, x=2, y=group2)
+        table.id.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
+        table.x.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
+        table.y.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True})
+
+        multi_container = SimpleMultiContainer(name='multi')
+        multi_container.add_container(group1)
+        multi_container.add_container(group2)
+        multi_container.add_container(table)
+
+        return multi_container
+
+    def test_append(self, cache_spec=False):
+        """Write the container to an HDF5 file, read the container from the file, and append to it."""
+
+        # write file
+        with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io:
+            write_io.write(self.container, cache_spec=cache_spec)
+
+        # read container from file
+        self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a')
+        read_container = self.reader.read()
+        self.assertContainerEqual(read_container, self.container, ignore_name=True)
+        self.assertContainerEqual(read_container['table']['y'][-1], read_container['group2'])
+
+        # append row
+        group1 = read_container['group1']
+        read_container['table'].add_row(id=103, x=3, y=group1)
+
+        self.assertContainerEqual(read_container['table']['y'][-1], group1)
+
 class TestVectorIndexDtype(TestCase):
 
     def set_up_array_index(self):

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
@@ -21,7 +21,7 @@
 from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder
 from hdmf.container import Container
 from hdmf import Data, docval
-from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError
+from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data
 from hdmf.spec.catalog import SpecCatalog
 from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace
 from hdmf.spec.spec import GroupSpec, DtypeSpec
@@ -3040,6 +3040,41 @@ def test_append_dataset_of_references(self):
             self.assertEqual(len(read_bucket1.baz_data.data), 2)
             self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"])
 
+    def test_append_dataset_of_references_compound(self):
+        """Test that exporting a written container with a dataset of references of compound data type works."""
+        bazs = []
+        baz_pairs = []
+        num_bazs = 10
+        for i in range(num_bazs):
+            b = Baz(name='baz%d' % i)
+            bazs.append(b)
+            baz_pairs.append((i, b))
+        baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=H5DataIO(baz_pairs, maxshape=(None,)))
+        bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
+            write_io.write(bucket)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io:
+            read_bucket1 = append_io.read()
+            new_baz = Baz(name='new')
+            read_bucket1.add_baz(new_baz)
+            append_io.write(read_bucket1)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
+            read_bucket1 = ref_io.read()
+            cpd_DoR = read_bucket1.baz_cpd_data.data
+            builder = ref_io.manager.get_builder(read_bucket1.bazs['new'])
+            ref = ref_io._create_ref(builder)
+            append_data(cpd_DoR.dataset, (11, ref))
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io:
+            read_bucket2 = read_io.read()
+
+            self.assertEqual(read_bucket2.baz_cpd_data.data[-1][0], 11)
+            self.assertIs(read_bucket2.baz_cpd_data.data[-1][1], read_bucket2.bazs['new'])
+
+
     def test_append_dataset_of_references_orphaned_target(self):
         bazs = []
         num_bazs = 1