diff --git a/CHANGELOG.md b/CHANGELOG.md index 8129a82e9..13be33c6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ ### Bug fixes - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) - Fix bug where pynwb version was reported as "unknown" to readthedocs @stephprince [#1810](https://github.com/NeurodataWithoutBorders/pynwb/pull/1810) +- Fixed bug to allow linking of `TimeSeries.data` by setting the `data` constructor argument to another `TimeSeries`. @oruebel [#1766](https://github.com/NeurodataWithoutBorders/pynwb/pull/1766) ### Documentation and tutorial enhancements - Add RemFile to streaming tutorial. @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761) @@ -26,8 +27,8 @@ ## PyNWB 2.5.0 (August 18, 2023) ### Enhancements and minor changes -- Add `TimeSeries.get_timestamps()`. @bendichter [#1741](https://github.com/NeurodataWithoutBorders/pynwb/pull/1741) -- Add `TimeSeries.get_data_in_units()`. @bendichter [#1745](https://github.com/NeurodataWithoutBorders/pynwb/pull/1745) +- Added `TimeSeries.get_timestamps()`. @bendichter [#1741](https://github.com/NeurodataWithoutBorders/pynwb/pull/1741) +- Added `TimeSeries.get_data_in_units()`. @bendichter [#1745](https://github.com/NeurodataWithoutBorders/pynwb/pull/1745) - Updated `ExternalResources` name change to `HERD`, along with HDMF 3.9.0 being the new minimum. @mavaylon1 [#1754](https://github.com/NeurodataWithoutBorders/pynwb/pull/1754) ### Documentation and tutorial enhancements @@ -40,15 +41,15 @@ ## PyNWB 2.4.0 (July 23, 2023) ### Enhancements and minor changes -- Add support for `ExternalResources`. @mavaylon1 [#1684](https://github.com/NeurodataWithoutBorders/pynwb/pull/1684) -- Update links for making a release. @mavaylon1 [#1720](https://github.com/NeurodataWithoutBorders/pynwb/pull/1720) +- Added support for `ExternalResources`. @mavaylon1 [#1684](https://github.com/NeurodataWithoutBorders/pynwb/pull/1684) +- Updated links for making a release. @mavaylon1 [#1720](https://github.com/NeurodataWithoutBorders/pynwb/pull/1720) ### Bug fixes - Fixed sphinx-gallery setting to correctly display index in the docs with sphinx-gallery>=0.11. @oruebel [#1733](https://github.com/NeurodataWithoutBorders/pynwb/pull/1733) ### Documentation and tutorial enhancements - Added thumbnail for Optogentics tutorial. @oruebel [#1729](https://github.com/NeurodataWithoutBorders/pynwb/pull/1729) -- Update and fix errors in tutorials. @bendichter @oruebel +- Updated and fixed errors in tutorials. @bendichter @oruebel ## PyNWB 2.3.3 (June 26, 2023) diff --git a/src/pynwb/io/base.py b/src/pynwb/io/base.py index 5b5aac48b..db9c259ef 100644 --- a/src/pynwb/io/base.py +++ b/src/pynwb/io/base.py @@ -73,6 +73,19 @@ def timestamps_carg(self, builder, manager): else: return tstamps_builder.data + @NWBContainerMapper.object_attr("data") + def data_attr(self, container, manager): + ret = container.fields.get('data') + if isinstance(ret, TimeSeries): + owner = ret + curr = owner.fields.get('data') + while isinstance(curr, TimeSeries): + owner = curr + curr = owner.fields.get('data') + data_builder = manager.build(owner) + ret = LinkBuilder(data_builder['data'], 'data') + return ret + @NWBContainerMapper.constructor_arg("data") def data_carg(self, builder, manager): # handle case where a TimeSeries is read and missing data @@ -105,7 +118,10 @@ def unit_carg(self, builder, manager): data_builder = manager.construct(target.parent) else: data_builder = target - unit_value = data_builder.attributes.get('unit') + if isinstance(data_builder, TimeSeries): # Data linked in another timeseries + unit_value = data_builder.unit + else: # DatasetBuilder owned by this timeseries + unit_value = data_builder.attributes.get('unit') if unit_value is None: return timeseries_cls.DEFAULT_UNIT return unit_value diff --git a/tests/integration/hdf5/test_base.py b/tests/integration/hdf5/test_base.py index 1e855f3ce..60f8510ff 100644 --- a/tests/integration/hdf5/test_base.py +++ b/tests/integration/hdf5/test_base.py @@ -46,6 +46,27 @@ def test_timestamps_linking(self): tsb = nwbfile.acquisition['b'] self.assertIs(tsa.timestamps, tsb.timestamps) + def test_data_linking(self): + ''' Test that data get linked to in TimeSeries ''' + tsa = TimeSeries(name='a', data=np.linspace(0, 1, 1000), timestamps=np.arange(1000.), unit='m') + tsb = TimeSeries(name='b', data=tsa, timestamps=np.arange(1000.), unit='m') + tsc = TimeSeries(name='c', data=tsb, timestamps=np.arange(1000.), unit='m') + nwbfile = NWBFile(identifier='foo', + session_start_time=datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()), + session_description='bar') + nwbfile.add_acquisition(tsa) + nwbfile.add_acquisition(tsb) + nwbfile.add_acquisition(tsc) + with NWBHDF5IO(self.path, 'w') as io: + io.write(nwbfile) + with NWBHDF5IO(self.path, 'r') as io: + nwbfile = io.read() + tsa = nwbfile.acquisition['a'] + tsb = nwbfile.acquisition['b'] + tsc = nwbfile.acquisition['c'] + self.assertIs(tsa.data, tsb.data) + self.assertIs(tsa.data, tsc.data) + class TestImagesIO(AcquisitionH5IOMixin, TestCase): diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index 6c86fc615..fba5d02db 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -14,29 +14,35 @@ class TestTimeSeriesModular(TestCase): def setUp(self): - self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) + # File paths + self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb') + self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb') + # Make the data container file write + self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc()) self.data = np.arange(2000).reshape((1000, 2)) self.timestamps = np.linspace(0, 1, 1000) - + # The container before roundtrip self.container = TimeSeries( name='data_ts', unit='V', data=self.data, timestamps=self.timestamps ) + self.data_read_io = None # HDF5IO used for reading the main data file + self.read_data_nwbfile = None # The NWBFile read after roundtrip + self.read_data_container = None # self.container after roundtrip - self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb') - self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb') - - self.read_container = None - self.link_read_io = None - self.data_read_io = None + # Variables for the second file which links the main data file + self.link_container = None # The container with the links before write + self.read_link_container = None # The container with the links after roundtrip + self.read_link_nwbfile = None # The NWBFile container containing link_container after roundtrip + self.link_read_io = None # HDF5IO use for reading the read_link_nwbfile def tearDown(self): - if self.read_container: - self.read_container.data.file.close() - self.read_container.timestamps.file.close() + if self.read_link_container: + self.read_link_container.data.file.close() + self.read_link_container.timestamps.file.close() if self.link_read_io: self.link_read_io.close() if self.data_read_io: @@ -64,49 +70,83 @@ def roundtripContainer(self): data_write_io.write(data_file) # read data file - with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: - data_file_obt = self.data_read_io.read() - - # write "link file" with timeseries.data that is an external link to the timeseries in "data file" - # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" - with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: - link_file = NWBFile( - session_description='a test file', - identifier='link_file', - session_start_time=self.start_time - ) - self.link_container = TimeSeries( - name='test_mod_ts', - unit='V', - data=data_file_obt.get_acquisition('data_ts'), # test direct link - timestamps=H5DataIO( - data=data_file_obt.get_acquisition('data_ts').timestamps, - link_data=True # test with setting link data - ) - ) - link_file.add_acquisition(self.link_container) - link_write_io.write(link_file) + self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager()) + self.read_data_nwbfile = self.data_read_io.read() + self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts') - # note that self.link_container contains a link to a dataset that is now closed + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=H5DataIO( + data=self.read_data_container.data, + link_data=True # test with setting link data + ), + timestamps=H5DataIO( + data=self.read_data_container.timestamps, + link_data=True # test with setting link data + ) + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) - self.read_nwbfile = self.link_read_io.read() - return self.getContainer(self.read_nwbfile) + self.read_link_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_link_nwbfile) def test_roundtrip(self): - self.read_container = self.roundtripContainer() - - # make sure we get a completely new object - self.assertIsNotNone(str(self.container)) # added as a test to make sure printing works + # Roundtrip the container + self.read_link_container = self.roundtripContainer() + + # 1. Make sure our containers are set correctly for the test + # 1.1: Make sure the container we read is not identical to the container we used for writing + self.assertNotEqual(id(self.link_container), id(self.read_link_container)) + self.assertNotEqual(id(self.container), id(self.read_data_container)) + # 1.2: Make sure the container we read is indeed the correct container we should use for testing + self.assertIs(self.read_link_nwbfile.objects[self.link_container.object_id], self.read_link_container) + self.assertIs(self.read_data_nwbfile.objects[self.container.object_id], self.read_data_container) + # 1.3: Make sure the object_ids of the container we wrote and read are the same + self.assertEqual(self.read_link_container.object_id, self.link_container.object_id) + self.assertEqual(self.read_data_container.object_id, self.container.object_id) + # 1.4: Make sure the object_ids between the source data and link data container are different + self.assertNotEqual(self.read_link_container.object_id, self.read_data_container.object_id) + + # Test that printing works for the source data and linked data container + self.assertIsNotNone(str(self.container)) + self.assertIsNotNone(str(self.read_data_container)) self.assertIsNotNone(str(self.link_container)) - self.assertIsNotNone(str(self.read_container)) - self.assertFalse(self.link_container.timestamps.valid) - self.assertTrue(self.read_container.timestamps.id.valid) - self.assertNotEqual(id(self.link_container), id(self.read_container)) - self.assertIs(self.read_nwbfile.objects[self.link_container.object_id], self.read_container) - self.assertContainerEqual(self.read_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) - self.assertEqual(self.read_container.object_id, self.link_container.object_id) + self.assertIsNotNone(str(self.read_link_container)) + + # Test that timestamps and data are valid after write + self.assertTrue(self.read_link_container.timestamps.id.valid) + self.assertTrue(self.read_link_container.data.id.valid) + self.assertTrue(self.read_data_container.timestamps.id.valid) + self.assertTrue(self.read_data_container.data.id.valid) + + # Make sure the data in the read data container and linked data container match the original container + self.assertContainerEqual(self.read_link_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) + self.assertContainerEqual(self.read_data_container, self.container, ignore_name=True, ignore_hdmf_attrs=True) + + # Make sure the timestamps and data are linked correctly. I.e., the filename of the h5py dataset should + # match between the data file and the file with links even-though they have been read from different files + self.assertEqual( + self.read_data_container.data.file.filename, # Path where the source data is stored + self.read_link_container.data.file.filename # Path where the linked h5py dataset points to + ) + self.assertEqual( + self.read_data_container.timestamps.file.filename, # Path where the source data is stored + self.read_link_container.timestamps.file.filename # Path where the linked h5py dataset points to + ) + + # validate both the source data and linked data file via the pynwb validator self.validate() def test_link_root(self): @@ -159,3 +199,55 @@ def validate(self): def getContainer(self, nwbfile): return nwbfile.get_acquisition('test_mod_ts') + + +class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular): + """ + Same as TestTimeSeriesModular but creating links by setting TimeSeries.data + and TimeSeries.timestamps to the other TimeSeries on construction, rather than + using H5DataIO. + """ + def setUp(self): + super().setUp() + self.skipTest("This behavior is currently broken. See issue .") + + def roundtripContainer(self): + # create and write data file + data_file = NWBFile( + session_description='a test file', + identifier='data_file', + session_start_time=self.start_time + ) + data_file.add_acquisition(self.container) + + with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io: + data_write_io.write(data_file) + + # read data file + self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager()) + self.read_data_nwbfile = self.data_read_io.read() + self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts') + + # write "link file" with timeseries.data that is an external link to the timeseries in "data file" + # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" + with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: + link_file = NWBFile( + session_description='a test file', + identifier='link_file', + session_start_time=self.start_time + ) + self.link_container = TimeSeries( + name='test_mod_ts', + unit='V', + data=self.read_data_container, # <--- This is the main difference to TestTimeSeriesModular + timestamps=self.read_data_container # <--- This is the main difference to TestTimeSeriesModular + ) + link_file.add_acquisition(self.link_container) + link_write_io.write(link_file) + + # note that self.link_container contains a link to a dataset that is now closed + + # read the link file + self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) + self.read_link_nwbfile = self.link_read_io.read() + return self.getContainer(self.read_link_nwbfile)