From b1cfdf3deb8b8b5c353c85df512e5052cb937332 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 14:20:30 -0700 Subject: [PATCH 01/12] Add missing TimeSeriesMap.data_attr function to link data between timeseries --- src/pynwb/io/base.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/pynwb/io/base.py b/src/pynwb/io/base.py index 5b5aac48b..31e206a7a 100644 --- a/src/pynwb/io/base.py +++ b/src/pynwb/io/base.py @@ -73,6 +73,20 @@ def timestamps_carg(self, builder, manager): else: return tstamps_builder.data + + @NWBContainerMapper.object_attr("data") + def data_attr(self, container, manager): + ret = container.fields.get('data') + if isinstance(ret, TimeSeries): + owner = ret + curr = owner.fields.get('data') + while isinstance(curr, TimeSeries): + owner = curr + curr = owner.fields.get('data') + data_builder = manager.build(owner) + ret = LinkBuilder(data_builder['data'], 'data') + return ret + @NWBContainerMapper.constructor_arg("data") def data_carg(self, builder, manager): # handle case where a TimeSeries is read and missing data @@ -105,7 +119,10 @@ def unit_carg(self, builder, manager): data_builder = manager.construct(target.parent) else: data_builder = target - unit_value = data_builder.attributes.get('unit') + if isinstance(data_builder, TimeSeries): # Data linked in another timeseries + unit_value = data_builder.unit + else: # DatasetBuilder owned by this timeseries + unit_value = data_builder.attributes.get('unit') if unit_value is None: return timeseries_cls.DEFAULT_UNIT return unit_value From 9667eaa6e7201cf9c604b7b7903085ac86cdbe1a Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 14:26:36 -0700 Subject: [PATCH 02/12] Fix flake8 --- src/pynwb/io/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pynwb/io/base.py b/src/pynwb/io/base.py index 31e206a7a..db9c259ef 100644 --- a/src/pynwb/io/base.py +++ b/src/pynwb/io/base.py @@ -73,7 +73,6 @@ def timestamps_carg(self, builder, manager): else: return tstamps_builder.data - @NWBContainerMapper.object_attr("data") def data_attr(self, container, manager): ret = container.fields.get('data') From a2abc0ee2d36f60c02c7dfb77416395703677b61 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 14:29:44 -0700 Subject: [PATCH 03/12] Updated CHANGELOG --- CHANGELOG.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9016dbb2e..4206693ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,15 @@ # PyNWB Changelog +## PyNWB 2.5.1 (Upcoming) + +### Bug fixes +- Fixed bug to allow linking of `TimeSeries.data` by setting the `data` constructor argument to another `TimeSeries`. @oruebel [#1766](https://github.com/NeurodataWithoutBorders/pynwb/pull/1766) + ## PyNWB 2.5.0 (August 18, 2023) ### Enhancements and minor changes -- Add `TimeSeries.get_timestamps()`. @bendichter [#1741](https://github.com/NeurodataWithoutBorders/pynwb/pull/1741) -- Add `TimeSeries.get_data_in_units()`. @bendichter [#1745](https://github.com/NeurodataWithoutBorders/pynwb/pull/1745) +- Added `TimeSeries.get_timestamps()`. @bendichter [#1741](https://github.com/NeurodataWithoutBorders/pynwb/pull/1741) +- Added `TimeSeries.get_data_in_units()`. @bendichter [#1745](https://github.com/NeurodataWithoutBorders/pynwb/pull/1745) - Updated `ExternalResources` name change to `HERD`, along with HDMF 3.9.0 being the new minimum. @mavaylon1 [#1754](https://github.com/NeurodataWithoutBorders/pynwb/pull/1754) ### Documentation and tutorial enhancements @@ -17,15 +22,15 @@ ## PyNWB 2.4.0 (July 23, 2023) ### Enhancements and minor changes -- Add support for `ExternalResources`. @mavaylon1 [#1684](https://github.com/NeurodataWithoutBorders/pynwb/pull/1684) -- Update links for making a release. @mavaylon1 [#1720](https://github.com/NeurodataWithoutBorders/pynwb/pull/1720) +- Added support for `ExternalResources`. @mavaylon1 [#1684](https://github.com/NeurodataWithoutBorders/pynwb/pull/1684) +- Updated links for making a release. @mavaylon1 [#1720](https://github.com/NeurodataWithoutBorders/pynwb/pull/1720) ### Bug fixes - Fixed sphinx-gallery setting to correctly display index in the docs with sphinx-gallery>=0.11. @oruebel [#1733](https://github.com/NeurodataWithoutBorders/pynwb/pull/1733) ### Documentation and tutorial enhancements - Added thumbnail for Optogentics tutorial. @oruebel [#1729](https://github.com/NeurodataWithoutBorders/pynwb/pull/1729) -- Update and fix errors in tutorials. @bendichter @oruebel +- Updated and fixed errors in tutorials. @bendichter @oruebel ## PyNWB 2.3.3 (June 26, 2023) From 109be2b1ee11b79dd483af20928de3d4478ead58 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 14:41:12 -0700 Subject: [PATCH 04/12] Add unit test for linking TimeSeries.data --- tests/integration/hdf5/test_base.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/integration/hdf5/test_base.py b/tests/integration/hdf5/test_base.py index 1e855f3ce..c27704c99 100644 --- a/tests/integration/hdf5/test_base.py +++ b/tests/integration/hdf5/test_base.py @@ -46,6 +46,23 @@ def test_timestamps_linking(self): tsb = nwbfile.acquisition['b'] self.assertIs(tsa.timestamps, tsb.timestamps) + def test_data_linking(self): + ''' Test that data get linked to in TimeSeries ''' + tsa = TimeSeries(name='a', data=np.linspace(0, 1, 1000), timestamps=np.arange(1000.), unit='m') + tsb = TimeSeries(name='b', data=tsa, timestamps=np.arange(1000.), unit='m') + nwbfile = NWBFile(identifier='foo', + session_start_time=datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()), + session_description='bar') + nwbfile.add_acquisition(tsa) + nwbfile.add_acquisition(tsb) + with NWBHDF5IO(self.path, 'w') as io: + io.write(nwbfile) + with NWBHDF5IO(self.path, 'r') as io: + nwbfile = io.read() + tsa = nwbfile.acquisition['a'] + tsb = nwbfile.acquisition['b'] + self.assertIs(tsa.data, tsb.data) + class TestImagesIO(AcquisitionH5IOMixin, TestCase): From caeb88951b443994d8cd8d38e335bf7f33b3a34f Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 17:39:50 -0700 Subject: [PATCH 05/12] Update modular storage test to separate problematic behavior --- .../integration/hdf5/test_modular_storage.py | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index 6c86fc615..1efcc210b 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -78,7 +78,10 @@ def roundtripContainer(self): self.link_container = TimeSeries( name='test_mod_ts', unit='V', - data=data_file_obt.get_acquisition('data_ts'), # test direct link + data=H5DataIO( + data=data_file_obt.get_acquisition('data_ts').data, + link_data=True # test with setting link data + ), timestamps=H5DataIO( data=data_file_obt.get_acquisition('data_ts').timestamps, link_data=True # test with setting link data @@ -159,3 +162,54 @@ def validate(self): def getContainer(self, nwbfile): return nwbfile.get_acquisition('test_mod_ts') + + +class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular): + """ + Same as TestTimeSeriesModular but creating links by setting TimeSeries.data + and TimeSeries.timestamps to the other TimeSeries on construction, rather than + using H5DataIO. + """ + def setUp(self): + super().setUp() + self.skipTest("This behavior is currently broken. See issue .") + + def roundtripContainer(self): + # create and write data file + data_file = NWBFile( + session_description='a test file', + identifier='data_file', + session_start_time=self.start_time + ) + data_file.add_acquisition(self.container) + + with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io: + data_write_io.write(data_file) + + # read data file + with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: + data_file_obt = self.data_read_io.read() + + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=data_file_obt.get_acquisition('data_ts'), # link by setting data to an external TimeSeries + timestamps=data_file_obt.get_acquisition('data_ts'), # link by setting to an external TimeSeries + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) + + # note that self.link_container contains a link to a dataset that is now closed + + # read the link file + self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) + self.read_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_nwbfile) \ No newline at end of file From 2a7b4a8b6a591561366b326390644c4583504b23 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 17:41:08 -0700 Subject: [PATCH 06/12] Fix flake8 --- tests/integration/hdf5/test_modular_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index 1efcc210b..c7892ae06 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -212,4 +212,4 @@ def roundtripContainer(self): # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) self.read_nwbfile = self.link_read_io.read() - return self.getContainer(self.read_nwbfile) \ No newline at end of file + return self.getContainer(self.read_nwbfile) From e933c33fbc4a952c0447aa54d127191b5c920996 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 23 Aug 2023 18:06:31 -0700 Subject: [PATCH 07/12] Add link to relevant issue to the test suite --- tests/integration/hdf5/test_modular_storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index c7892ae06..d16f4899e 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -172,7 +172,8 @@ class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular): """ def setUp(self): super().setUp() - self.skipTest("This behavior is currently broken. See issue .") + self.skipTest("This behavior is currently broken. " + "See issue https://github.com/NeurodataWithoutBorders/pynwb/issues/1767") def roundtripContainer(self): # create and write data file From 5272b1d6aa9a4e4b0e489cf7d1f9e97c0df6ffd6 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 24 Aug 2023 14:35:47 -0700 Subject: [PATCH 08/12] Fix #1767 Update test to ensure source file remains open when creating the link --- .../integration/hdf5/test_modular_storage.py | 185 +++++++++++------- 1 file changed, 109 insertions(+), 76 deletions(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index d16f4899e..c510f4d7a 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -14,29 +14,35 @@ class TestTimeSeriesModular(TestCase): def setUp(self): - self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) + # File paths + self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb') + self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb') + # Make the data container file write + self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) self.data = np.arange(2000).reshape((1000, 2)) self.timestamps = np.linspace(0, 1, 1000) - + # The container before roundtrip self.container = TimeSeries( name='data_ts', unit='V', data=self.data, timestamps=self.timestamps ) + self.data_read_io = None # HDF5IO used for reading the main data file + self.read_data_nwbfile = None # The NWBFile read after roundtrip + self.read_data_container = None # self.container after rountrip - self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb') - self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb') - - self.read_container = None - self.link_read_io = None - self.data_read_io = None + # Variables for the second file which links the main data file + self.link_container = None # The container with the links before write + self.read_link_container = None # The container with the links after roundtrip + self.read_link_nwbfile = None # The NWBFile container containing link_container after roundtrip + self.link_read_io = None # HDF5IO use for reading the read_link_nwbfile def tearDown(self): - if self.read_container: - self.read_container.data.file.close() - self.read_container.timestamps.file.close() + if self.read_link_container: + self.read_link_container.data.file.close() + self.read_link_container.timestamps.file.close() if self.link_read_io: self.link_read_io.close() if self.data_read_io: @@ -64,52 +70,83 @@ def roundtripContainer(self): data_write_io.write(data_file) # read data file - with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: - data_file_obt = self.data_read_io.read() - - # write "link file" with timeseries.data that is an external link to the timeseries in "data file" - # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" - with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: - link_file = NWBFile( - session_description='a test file', - identifier='link_file', - session_start_time=self.start_time - ) - self.link_container = TimeSeries( - name='test_mod_ts', - unit='V', - data=H5DataIO( - data=data_file_obt.get_acquisition('data_ts').data, - link_data=True # test with setting link data - ), - timestamps=H5DataIO( - data=data_file_obt.get_acquisition('data_ts').timestamps, - link_data=True # test with setting link data - ) - ) - link_file.add_acquisition(self.link_container) - link_write_io.write(link_file) + self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager()) + self.read_data_nwbfile = self.data_read_io.read() + self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts') - # note that self.link_container contains a link to a dataset that is now closed + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=H5DataIO( + data=self.read_data_container.data, + link_data=True # test with setting link data + ), + timestamps=H5DataIO( + data=self.read_data_container.timestamps, + link_data=True # test with setting link data + ) + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) - self.read_nwbfile = self.link_read_io.read() - return self.getContainer(self.read_nwbfile) + self.read_link_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_link_nwbfile) def test_roundtrip(self): - self.read_container = self.roundtripContainer() - - # make sure we get a completely new object - self.assertIsNotNone(str(self.container)) # added as a test to make sure printing works + # Roundtrip the container + self.read_link_container = self.roundtripContainer() + + # 1. Make sure our containers are set correctly for the test + # 1.1: Make sure the container we read is not identical to the container we used for writing + self.assertNotEqual(id(self.link_container), id(self.read_link_container)) + self.assertNotEqual(id(self.container), id(self.read_data_container)) + # 1.2: Make sure the container we read is indeed the correct container we should use for testing + self.assertIs(self.read_link_nwbfile.objects[self.link_container.object_id], self.read_link_container) + self.assertIs(self.read_data_nwbfile.objects[self.container.object_id], self.read_data_container) + # 1.3: Make sure the object_ids of the container we wrote and read are the same + self.assertEqual(self.read_link_container.object_id, self.link_container.object_id) + self.assertEqual(self.read_data_container.object_id, self.container.object_id) + # 1.4: Make sure the object_ids between the source data and link data container are different + self.assertNotEqual(self.read_link_container.object_id, self.read_data_container.object_id) + + # Test that printing works for the source data and linked data container + self.assertIsNotNone(str(self.container)) + self.assertIsNotNone(str(self.read_data_container)) self.assertIsNotNone(str(self.link_container)) - self.assertIsNotNone(str(self.read_container)) - self.assertFalse(self.link_container.timestamps.valid) - self.assertTrue(self.read_container.timestamps.id.valid) - self.assertNotEqual(id(self.link_container), id(self.read_container)) - self.assertIs(self.read_nwbfile.objects[self.link_container.object_id], self.read_container) - self.assertContainerEqual(self.read_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) - self.assertEqual(self.read_container.object_id, self.link_container.object_id) + self.assertIsNotNone(str(self.read_link_container)) + + # Test that timestamps and data are valid after write + self.assertTrue(self.read_link_container.timestamps.id.valid) + self.assertTrue(self.read_link_container.data.id.valid) + self.assertTrue(self.read_data_container.timestamps.id.valid) + self.assertTrue(self.read_data_container.data.id.valid) + + # Make sure the data in the read data container and linked data container match the original container + self.assertContainerEqual(self.read_link_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) + self.assertContainerEqual(self.read_data_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) + + # Make sure the timestamps and data are linked correctly. I.e., the filename of the h5py dataset should + # match between the data file and the file with links even-though they have been read from different files + self.assertEqual( + self.read_data_container.data.file.filename, # Path where the source data is stored + self.read_link_container.data.file.filename # Path where the linked h5py dataset points to + ) + self.assertEqual( + self.read_data_container.timestamps.file.filename, # Path where the source data is stored + self.read_link_container.timestamps.file.filename # Path where the linked h5py dataset points to + ) + + # validate both the source data and linked data file via the pynwb validator self.validate() def test_link_root(self): @@ -170,11 +207,6 @@ class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular): and TimeSeries.timestamps to the other TimeSeries on construction, rather than using H5DataIO. """ - def setUp(self): - super().setUp() - self.skipTest("This behavior is currently broken. " - "See issue https://github.com/NeurodataWithoutBorders/pynwb/issues/1767") - def roundtripContainer(self): # create and write data file data_file = NWBFile( @@ -188,29 +220,30 @@ def roundtripContainer(self): data_write_io.write(data_file) # read data file - with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: - data_file_obt = self.data_read_io.read() - - # write "link file" with timeseries.data that is an external link to the timeseries in "data file" - # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" - with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: - link_file = NWBFile( - session_description='a test file', - identifier='link_file', - session_start_time=self.start_time - ) - self.link_container = TimeSeries( - name='test_mod_ts', - unit='V', - data=data_file_obt.get_acquisition('data_ts'), # link by setting data to an external TimeSeries - timestamps=data_file_obt.get_acquisition('data_ts'), # link by setting to an external TimeSeries - ) - link_file.add_acquisition(self.link_container) - link_write_io.write(link_file) + self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager()) + self.read_data_nwbfile = self.data_read_io.read() + self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts') + + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=self.read_data_container.data, # <--- This is the main difference to TestTimeSeriesModular + timestamps=self.read_data_container.timestamps # <--- This is the main difference to TestTimeSeriesModular + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) # note that self.link_container contains a link to a dataset that is now closed # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) - self.read_nwbfile = self.link_read_io.read() - return self.getContainer(self.read_nwbfile) + self.read_link_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_link_nwbfile) From 4fc3943810b418389cb963d79966c7c429bd9e4f Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 24 Aug 2023 14:35:47 -0700 Subject: [PATCH 09/12] Update test to ensure source file remains open when creating the link --- .../integration/hdf5/test_modular_storage.py | 185 +++++++++++------- 1 file changed, 109 insertions(+), 76 deletions(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index d16f4899e..c510f4d7a 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -14,29 +14,35 @@ class TestTimeSeriesModular(TestCase): def setUp(self): - self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) + # File paths + self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb') + self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb') + # Make the data container file write + self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) self.data = np.arange(2000).reshape((1000, 2)) self.timestamps = np.linspace(0, 1, 1000) - + # The container before roundtrip self.container = TimeSeries( name='data_ts', unit='V', data=self.data, timestamps=self.timestamps ) + self.data_read_io = None # HDF5IO used for reading the main data file + self.read_data_nwbfile = None # The NWBFile read after roundtrip + self.read_data_container = None # self.container after rountrip - self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb') - self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb') - - self.read_container = None - self.link_read_io = None - self.data_read_io = None + # Variables for the second file which links the main data file + self.link_container = None # The container with the links before write + self.read_link_container = None # The container with the links after roundtrip + self.read_link_nwbfile = None # The NWBFile container containing link_container after roundtrip + self.link_read_io = None # HDF5IO use for reading the read_link_nwbfile def tearDown(self): - if self.read_container: - self.read_container.data.file.close() - self.read_container.timestamps.file.close() + if self.read_link_container: + self.read_link_container.data.file.close() + self.read_link_container.timestamps.file.close() if self.link_read_io: self.link_read_io.close() if self.data_read_io: @@ -64,52 +70,83 @@ def roundtripContainer(self): data_write_io.write(data_file) # read data file - with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: - data_file_obt = self.data_read_io.read() - - # write "link file" with timeseries.data that is an external link to the timeseries in "data file" - # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" - with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: - link_file = NWBFile( - session_description='a test file', - identifier='link_file', - session_start_time=self.start_time - ) - self.link_container = TimeSeries( - name='test_mod_ts', - unit='V', - data=H5DataIO( - data=data_file_obt.get_acquisition('data_ts').data, - link_data=True # test with setting link data - ), - timestamps=H5DataIO( - data=data_file_obt.get_acquisition('data_ts').timestamps, - link_data=True # test with setting link data - ) - ) - link_file.add_acquisition(self.link_container) - link_write_io.write(link_file) + self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager()) + self.read_data_nwbfile = self.data_read_io.read() + self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts') - # note that self.link_container contains a link to a dataset that is now closed + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=H5DataIO( + data=self.read_data_container.data, + link_data=True # test with setting link data + ), + timestamps=H5DataIO( + data=self.read_data_container.timestamps, + link_data=True # test with setting link data + ) + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) - self.read_nwbfile = self.link_read_io.read() - return self.getContainer(self.read_nwbfile) + self.read_link_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_link_nwbfile) def test_roundtrip(self): - self.read_container = self.roundtripContainer() - - # make sure we get a completely new object - self.assertIsNotNone(str(self.container)) # added as a test to make sure printing works + # Roundtrip the container + self.read_link_container = self.roundtripContainer() + + # 1. Make sure our containers are set correctly for the test + # 1.1: Make sure the container we read is not identical to the container we used for writing + self.assertNotEqual(id(self.link_container), id(self.read_link_container)) + self.assertNotEqual(id(self.container), id(self.read_data_container)) + # 1.2: Make sure the container we read is indeed the correct container we should use for testing + self.assertIs(self.read_link_nwbfile.objects[self.link_container.object_id], self.read_link_container) + self.assertIs(self.read_data_nwbfile.objects[self.container.object_id], self.read_data_container) + # 1.3: Make sure the object_ids of the container we wrote and read are the same + self.assertEqual(self.read_link_container.object_id, self.link_container.object_id) + self.assertEqual(self.read_data_container.object_id, self.container.object_id) + # 1.4: Make sure the object_ids between the source data and link data container are different + self.assertNotEqual(self.read_link_container.object_id, self.read_data_container.object_id) + + # Test that printing works for the source data and linked data container + self.assertIsNotNone(str(self.container)) + self.assertIsNotNone(str(self.read_data_container)) self.assertIsNotNone(str(self.link_container)) - self.assertIsNotNone(str(self.read_container)) - self.assertFalse(self.link_container.timestamps.valid) - self.assertTrue(self.read_container.timestamps.id.valid) - self.assertNotEqual(id(self.link_container), id(self.read_container)) - self.assertIs(self.read_nwbfile.objects[self.link_container.object_id], self.read_container) - self.assertContainerEqual(self.read_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) - self.assertEqual(self.read_container.object_id, self.link_container.object_id) + self.assertIsNotNone(str(self.read_link_container)) + + # Test that timestamps and data are valid after write + self.assertTrue(self.read_link_container.timestamps.id.valid) + self.assertTrue(self.read_link_container.data.id.valid) + self.assertTrue(self.read_data_container.timestamps.id.valid) + self.assertTrue(self.read_data_container.data.id.valid) + + # Make sure the data in the read data container and linked data container match the original container + self.assertContainerEqual(self.read_link_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) + self.assertContainerEqual(self.read_data_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) + + # Make sure the timestamps and data are linked correctly. I.e., the filename of the h5py dataset should + # match between the data file and the file with links even-though they have been read from different files + self.assertEqual( + self.read_data_container.data.file.filename, # Path where the source data is stored + self.read_link_container.data.file.filename # Path where the linked h5py dataset points to + ) + self.assertEqual( + self.read_data_container.timestamps.file.filename, # Path where the source data is stored + self.read_link_container.timestamps.file.filename # Path where the linked h5py dataset points to + ) + + # validate both the source data and linked data file via the pynwb validator self.validate() def test_link_root(self): @@ -170,11 +207,6 @@ class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular): and TimeSeries.timestamps to the other TimeSeries on construction, rather than using H5DataIO. """ - def setUp(self): - super().setUp() - self.skipTest("This behavior is currently broken. " - "See issue https://github.com/NeurodataWithoutBorders/pynwb/issues/1767") - def roundtripContainer(self): # create and write data file data_file = NWBFile( @@ -188,29 +220,30 @@ def roundtripContainer(self): data_write_io.write(data_file) # read data file - with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: - data_file_obt = self.data_read_io.read() - - # write "link file" with timeseries.data that is an external link to the timeseries in "data file" - # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" - with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: - link_file = NWBFile( - session_description='a test file', - identifier='link_file', - session_start_time=self.start_time - ) - self.link_container = TimeSeries( - name='test_mod_ts', - unit='V', - data=data_file_obt.get_acquisition('data_ts'), # link by setting data to an external TimeSeries - timestamps=data_file_obt.get_acquisition('data_ts'), # link by setting to an external TimeSeries - ) - link_file.add_acquisition(self.link_container) - link_write_io.write(link_file) + self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager()) + self.read_data_nwbfile = self.data_read_io.read() + self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts') + + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=self.read_data_container.data, # <--- This is the main difference to TestTimeSeriesModular + timestamps=self.read_data_container.timestamps # <--- This is the main difference to TestTimeSeriesModular + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) # note that self.link_container contains a link to a dataset that is now closed # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) - self.read_nwbfile = self.link_read_io.read() - return self.getContainer(self.read_nwbfile) + self.read_link_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_link_nwbfile) From e39046e30ca5bd3a8387de855ad2b57a03782fb5 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 24 Aug 2023 15:01:57 -0700 Subject: [PATCH 10/12] Redisable problematic test --- tests/integration/hdf5/test_modular_storage.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index c510f4d7a..d627ce603 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -207,6 +207,10 @@ class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular): and TimeSeries.timestamps to the other TimeSeries on construction, rather than using H5DataIO. """ + def setUp(self): + super().setUp() + self.skipTest("This behavior is currently broken. See issue .") + def roundtripContainer(self): # create and write data file data_file = NWBFile( @@ -235,8 +239,8 @@ def roundtripContainer(self): self.link_container = TimeSeries( name='test_mod_ts', unit='V', - data=self.read_data_container.data, # <--- This is the main difference to TestTimeSeriesModular - timestamps=self.read_data_container.timestamps # <--- This is the main difference to TestTimeSeriesModular + data=self.read_data_container, # <--- This is the main difference to TestTimeSeriesModular + timestamps=self.read_data_container # <--- This is the main difference to TestTimeSeriesModular ) link_file.add_acquisition(self.link_container) link_write_io.write(link_file) From 36000795fe46287cec7407f49fd53719cb47a6fc Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Thu, 24 Aug 2023 15:18:50 -0700 Subject: [PATCH 11/12] Fix codespell --- tests/integration/hdf5/test_modular_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index d627ce603..fba5d02db 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -31,7 +31,7 @@ def setUp(self): ) self.data_read_io = None # HDF5IO used for reading the main data file self.read_data_nwbfile = None # The NWBFile read after roundtrip - self.read_data_container = None # self.container after rountrip + self.read_data_container = None # self.container after roundtrip # Variables for the second file which links the main data file self.link_container = None # The container with the links before write From 79cb3c06bdd462c125a024f294335748ce02b133 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 7 Feb 2024 08:06:57 -0800 Subject: [PATCH 12/12] Update test_base.py --- tests/integration/hdf5/test_base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/hdf5/test_base.py b/tests/integration/hdf5/test_base.py index c27704c99..60f8510ff 100644 --- a/tests/integration/hdf5/test_base.py +++ b/tests/integration/hdf5/test_base.py @@ -50,18 +50,22 @@ def test_data_linking(self): ''' Test that data get linked to in TimeSeries ''' tsa = TimeSeries(name='a', data=np.linspace(0, 1, 1000), timestamps=np.arange(1000.), unit='m') tsb = TimeSeries(name='b', data=tsa, timestamps=np.arange(1000.), unit='m') + tsc = TimeSeries(name='c', data=tsb, timestamps=np.arange(1000.), unit='m') nwbfile = NWBFile(identifier='foo', session_start_time=datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()), session_description='bar') nwbfile.add_acquisition(tsa) nwbfile.add_acquisition(tsb) + nwbfile.add_acquisition(tsc) with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile) with NWBHDF5IO(self.path, 'r') as io: nwbfile = io.read() tsa = nwbfile.acquisition['a'] tsb = nwbfile.acquisition['b'] + tsc = nwbfile.acquisition['c'] self.assertIs(tsa.data, tsb.data) + self.assertIs(tsa.data, tsc.data) class TestImagesIO(AcquisitionH5IOMixin, TestCase):