NeurodataWithoutBorders · oruebel · Jan 11, 2023 · Jan 11, 2023 · Jan 11, 2023 · Jan 11, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,8 @@
   [#1591](https://github.com/NeurodataWithoutBorders/pynwb/pull/1591)
 - Updated citation for PyNWB in docs and duecredit to use the eLife NWB paper. @oruebel [#1604](https://github.com/NeurodataWithoutBorders/pynwb/pull/1604)
 - Fixed docs build warnings due to use of hardcoded links. @oruebel [#1604](https://github.com/NeurodataWithoutBorders/pynwb/pull/1604)
+- Updated the [iterative write tutorial](https://pynwb.readthedocs.io/en/stable/tutorials/advanced_io/iterative_write.html) to reference the new ``GenericDataChunkIterator`` functionality and use the new ``H5DataIO.dataset`` property to simplify the custom I/O section. @oruebel [#1633](https://github.com/NeurodataWithoutBorders/pynwb/pull/1633)
+- Updated the [parallel I/O tutorial](https://pynwb.readthedocs.io/en/stable/tutorials/advanced_io/parallelio.html) to use the new ``H5DataIO.dataset`` feature to set up an empty dataset for parallel write. @oruebel [#1633](https://github.com/NeurodataWithoutBorders/pynwb/pull/1633)
 
 ### Bug fixes
 - Added shape constraint to `PatchClampSeries.data`. @bendichter

diff --git a/docs/Makefile b/docs/Makefile
@@ -9,6 +9,7 @@ PAPER          =
 BUILDDIR       = _build
 SRCDIR         = ../src
 RSTDIR         = source
+GALLERYDIR     = gallery
 PKGNAME        = pynwb
 
 # Internal variables.
@@ -45,7 +46,7 @@ help:
 	@echo "  apidoc     to build RST from source code"
 
 clean:
-	-rm -rf $(BUILDDIR)/* $(RSTDIR)/$(PKGNAME)*.rst $(RSTDIR)/tutorials
+	-rm -rf $(BUILDDIR)/* $(RSTDIR)/$(PKGNAME)*.rst $(RSTDIR)/tutorials $(GALLERYDIR)/advanced_io/*.npy $(GALLERYDIR)/advanced_io/*.nwb
 
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html

diff --git a/docs/gallery/advanced_io/parallelio.py b/docs/gallery/advanced_io/parallelio.py
@@ -30,7 +30,7 @@
 #   from dateutil import tz
 #   from pynwb import NWBHDF5IO, NWBFile, TimeSeries
 #   from datetime import datetime
-#   from hdmf.data_utils import DataChunkIterator
+#   from hdmf.backends.hdf5.h5_utils import H5DataIO
 #
 #   start_time = datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz('US/Pacific'))
 #   fname = 'test_parallel_pynwb.nwb'
@@ -40,9 +40,11 @@
 #   # write in parallel but we do not write any data
 #   if rank == 0:
 #       nwbfile = NWBFile('aa', 'aa', start_time)
-#       data = DataChunkIterator(data=None, maxshape=(4,), dtype=np.dtype('int'))
+#       data = H5DataIO(shape=(4,),
+#                       maxshape=(4,),
+#                       dtype=np.dtype('int'))
 #
-#       nwbfile.add_acquisition(TimeSeries('ts_name', description='desc', data=data,
+#       nwbfile.add_acquisition(TimeSeries(name='ts_name', description='desc', data=data,
 #                                          rate=100., unit='m'))
 #       with NWBHDF5IO(fname, 'w') as io:
 #           io.write(nwbfile)
@@ -58,24 +60,9 @@
 #       print(io.read().acquisition['ts_name'].data[rank])
 
 ####################
-# To specify details about chunking, compression and other HDF5-specific I/O options,
-# we can wrap data via ``H5DataIO``, e.g,
 #
-# .. code-block:: python
-#
-#   data = H5DataIO(DataChunkIterator(data=None, maxshape=(100000, 100),
-#                                     dtype=np.dtype('float')),
-#                                     chunks=(10, 10), maxshape=(None, None))
+# .. note::
 #
-# would initialize your dataset with a shape of (100000, 100) and maxshape of (None, None)
-# and your own custom chunking of (10, 10).
-
-####################
-# Disclaimer
-# ----------------
+#    Using :py:class:`hdmf.backends.hdf5.h5_utils.H5DataIO` we can also specify further
+#    details about the data layout, e.g., via the chunking and compression parameters.
 #
-# External links included in the tutorial are being provided as a convenience and for informational purposes only;
-# they do not constitute an endorsement or an approval by the authors of any of the products, services or opinions of
-# the corporation or organization or individual. The authors bear no responsibility for the accuracy, legality or
-# content of the external site or for that of subsequent links. Contact the external site for answers to questions
-# regarding its content.
diff --git a/docs/gallery/advanced_io/iterative_write.py → ...llery/advanced_io/plot_iterative_write.py b/docs/gallery/advanced_io/iterative_write.py → ...llery/advanced_io/plot_iterative_write.py
@@ -42,6 +42,7 @@
 #   * **Data generators** Data generators are in many ways similar to data streams only that the
 #     data is typically being generated locally and programmatically rather than from an external
 #     data source.
+#
 # * **Sparse data arrays** In order to reduce storage size of sparse arrays a challenge is that while
 #   the data array (e.g., a matrix) may be large, only few values are set. To avoid storage overhead
 #   for storing the full array we can employ (in HDF5) a combination of chunking, compression, and
@@ -71,6 +72,13 @@
 #   This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and
 #   writing larger blocks at once.
 #
+# * :py:class:`~hdmf.data_utils.GenericDataChunkIterator` is a semi-abstract version of a
+#   :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that automatically handles the selection of
+#   buffer regions and resolves communication of compatible chunk regions. Users specify chunk
+#   and buffer shapes or sizes and the iterator will manage how to break the data up for write.
+#   For further details, see the
+#   :hdmf-docs:`GenericDataChunkIterator tutorial <tutorials/plot_generic_data_chunk_tutorial.html>`.
+#
 
 ####################
 # Iterative Data Write: API
@@ -107,11 +115,15 @@
 from pynwb import NWBHDF5IO
 
 
-def write_test_file(filename, data):
+def write_test_file(filename, data, close_io=True):
     """
+
     Simple helper function to write an NWBFile with a single timeseries containing data
     :param filename: String with the name of the output file
     :param data: The data of the timeseries
+    :param close_io: Close and destroy the NWBHDF5IO object used for writing (default=True)
+
+    :returns: None if close_io==True otherwise return NWBHDF5IO object used for write
     """
 
     # Create a test NWBfile
@@ -133,7 +145,11 @@ def write_test_file(filename, data):
     # Write the data to file
     io = NWBHDF5IO(filename, 'w')
     io.write(nwbfile)
-    io.close()
+    if close_io:
+        io.close()
+        del io
+        io = None
+    return io
 
 
 ####################
@@ -196,12 +212,6 @@ def iter_sin(chunk_length=10, max_chunks=100):
                                                             str(data.dtype)))
 
 ####################
-# ``[Out]:``
-#
-# .. code-block:: python
-#
-#   maxshape=(None, 10), recommended_data_shape=(1, 10), dtype=float64
-#
 # As we can see :py:class:`~hdmf.data_utils.DataChunkIterator` automatically recommends
 # in its ``maxshape`` that the first dimensions of our array should be unlimited (``None``) and the second
 # dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator`
@@ -216,8 +226,11 @@ def iter_sin(chunk_length=10, max_chunks=100):
 #    :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
 #    **single** complete element along the **first dimension** of our a array (i.e., iterate over the first
 #    axis and yield one-element-at-a-time). This behavior is useful in many practical cases. However, if
-#    this strategy does not match our needs, then you can alternatively implement our own derived
-#    :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.  We show an example of this next.
+#    this strategy does not match our needs, then using :py:class:`~hdmf.data_utils.GenericDataChunkIterator`
+#    or implementing your own derived :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` may be more
+#    appropriate. We show an example of how to implement your own :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`
+#    next. See the :hdmf-docs:`GenericDataChunkIterator tutorial <tutorials/plot_generic_data_chunk_tutorial.html>` as
+#    part of the HDMF documentation for details on how to use :py:class:`~hdmf.data_utils.GenericDataChunkIterator`.
 #
 
 
@@ -387,26 +400,6 @@ def maxshape(self):
 print("   Reduction     :  %.2f x" % (expected_size / file_size_largechunks_compressed))
 
 ####################
-# ``[Out]:``
-#
-#  .. code-block:: python
-#
-#        1) Sparse Matrix Size:
-#           Expected Size :  8000000.00 MB
-#           Occupied Size :  0.80000 MB
-#        2) NWB HDF5 file (no compression):
-#           File Size     :  0.89 MB
-#           Reduction     :  9035219.28 x
-#        3) NWB HDF5 file (with GZIP compression):
-#           File Size     :  0.88847 MB
-#           Reduction     :  9004283.79 x
-#        4) NWB HDF5 file (large chunks):
-#           File Size     :  80.08531 MB
-#           Reduction     :  99893.47 x
-#        5) NWB HDF5 file (large chunks with compression):
-#           File Size     :  1.14671 MB
-#           Reduction     :  6976450.12 x
-#
 # Discussion
 # ^^^^^^^^^^
 #
@@ -490,7 +483,7 @@ def maxshape(self):
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # Note, we here use a generator for simplicity but we could equally well also implement our own
-# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.
+# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` or use :py:class:`~hdmf.data_utils.GenericDataChunkIterator`.
 
 
 def iter_largearray(filename, shape, dtype='float64'):
@@ -553,15 +546,6 @@ def iter_largearray(filename, shape, dtype='float64'):
     else:
         print("ERROR: Mismatch between data")
 
-
-####################
-# ``[Out]:``
-#
-#  .. code-block:: python
-#
-#       Success: All data values match
-
-
 ####################
 # Example: Convert arrays stored in multiple files
 # -----------------------------------------------------
@@ -705,46 +689,37 @@ def maxshape(self):
 #
 from hdmf.backends.hdf5.h5_utils import H5DataIO
 
-write_test_file(filename='basic_alternative_custom_write.nwb',
-                data=H5DataIO(data=np.empty(shape=(0, 10), dtype='float'),
-                              maxshape=(None, 10),  # <-- Make the time dimension resizable
-                              chunks=(131072, 2),   # <-- Use 2MB chunks
-                              compression='gzip',   # <-- Enable GZip compression
-                              compression_opts=4,   # <-- GZip aggression
-                              shuffle=True,         # <-- Enable shuffle filter
-                              fillvalue=np.nan      # <-- Use NAN as fillvalue
-                              )
-                )
+# Use H5DataIO to specify how to setup the dataset in the file
+dataio = H5DataIO(
+    shape=(0, 10),            # Initial shape. If the shape is known then set to full shape
+    dtype=np.dtype('float'),  # dtype of the dataset
+    maxshape=(None, 10),      # Make the time dimension resizable
+    chunks=(131072, 2),       # Use 2MB chunks
+    compression='gzip',       # Enable GZip compression
+    compression_opts=4,       # GZip aggression
+    shuffle=True,             # Enable shuffle filter
+    fillvalue=np.nan          # Use NAN as fillvalue
+)
+
+# Write a test NWB file with our dataset and keep the NWB file (i.e., the  NWBHDF5IO object) open
+io = write_test_file(
+    filename='basic_alternative_custom_write.nwb',
+    data=dataio,
+    close_io=False
+)
 
 ####################
 # Step 2: Get the dataset(s) to be updated
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-from pynwb import NWBHDF5IO    # noqa
-
-io = NWBHDF5IO('basic_alternative_custom_write.nwb', mode='a')
-nwbfile = io.read()
-data = nwbfile.get_acquisition('synthetic_timeseries').data
-
-# Let's check what the data looks like
-print("Shape %s, Chunks: %s, Maxshape=%s" % (str(data.shape), str(data.chunks), str(data.maxshape)))
-
-####################
-# ``[Out]:``
-#
-#  .. code-block:: python
-#
-#       Shape (0, 10), Chunks: (131072, 2), Maxshape=(None, 10)
-#
 
-####################
-# Step 3: Implement custom write
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#
+# Let's check what the data looks like before we write
+print("Before write: Shape= %s, Chunks= %s, Maxshape=%s" %
+      (str(dataio.dataset.shape), str(dataio.dataset.chunks), str(dataio.dataset.maxshape)))
 
-data.resize((8, 10))    # <-- Allocate the space with need
-data[0:3, :] = 1        # <-- Write timesteps 0,1,2
-data[3:6, :] = 2        # <-- Write timesteps 3,4,5,  Note timesteps 6,7 are not being initialized
+dataio.dataset.resize((8, 10))    # <-- Allocate space. Only needed if we didn't set the initial shape large enough
+dataio.dataset[0:3, :] = 1        # <-- Write timesteps 0,1,2
+dataio.dataset[3:6, :] = 2        # <-- Write timesteps 3,4,5,  Note timesteps 6,7 are not being initialized
 io.close()              # <-- Close the file
 
 
@@ -756,20 +731,13 @@ def maxshape(self):
 
 io = NWBHDF5IO('basic_alternative_custom_write.nwb', mode='a')
 nwbfile = io.read()
-data = nwbfile.get_acquisition('synthetic_timeseries').data
-print(data[:])
+dataset = nwbfile.get_acquisition('synthetic_timeseries').data
+print("After write: Shape= %s, Chunks= %s, Maxshape=%s" %
+      (str(dataset.shape), str(dataset.chunks), str(dataset.maxshape)))
+print(dataset[:])
 io.close()
 
 ####################
-# ``[Out]:``
-#
-#  .. code-block:: python
-#
-#       [[  1.   1.   1.   1.   1.   1.   1.   1.   1.   1.]
-#        [  1.   1.   1.   1.   1.   1.   1.   1.   1.   1.]
-#        [  1.   1.   1.   1.   1.   1.   1.   1.   1.   1.]
-#        [  2.   2.   2.   2.   2.   2.   2.   2.   2.   2.]
-#        [  2.   2.   2.   2.   2.   2.   2.   2.   2.   2.]
-#        [  2.   2.   2.   2.   2.   2.   2.   2.   2.   2.]
-#        [ nan  nan  nan  nan  nan  nan  nan  nan  nan  nan]
-#        [ nan  nan  nan  nan  nan  nan  nan  nan  nan  nan]]
+# We allocated our data to be ``shape=(8, 10)`` but we only wrote data to the first 6 rows of the
+# array. As expected, we therefore, see our ``fillvalue`` of ``nan`` in the last two rows of the data.
+#
diff --git a/docs/gallery/general/read_basics.py b/docs/gallery/general/read_basics.py
@@ -331,7 +331,7 @@
 # object and accessing its attributes, but it may be useful to explore the data in a
 # more interactive, visual way.
 #
-# You can use `NWBWidgets <https://github.com/NeurodataWithoutBorders/nwb-jupyter-widgets>`_,
+# You can use `NWBWidgets <https://github.com/NeurodataWithoutBorders/nwbwidgets>`_,
 # a package containing interactive widgets for visualizing NWB data,
 # or you can use the `HDFView <https://www.hdfgroup.org/downloads/hdfview>`_
 # tool, which can open any generic HDF5 file, which an NWB file is.

diff --git a/docs/make.bat b/docs/make.bat
@@ -10,6 +10,7 @@ if "%SPHINXAPIDOC%" == "" (
 )
 set BUILDDIR=_build
 set RSTDIR=source
+set GALLERYDIR=gallery
 set SRCDIR=../src
 set PKGNAME=pynwb
 set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% %RSTDIR%
@@ -51,6 +52,8 @@ if "%1" == "clean" (
 	del /q /s %BUILDDIR%\*
 	del /q %RSTDIR%\%PKGNAME%*.rst
 	rmdir /q /s %RSTDIR%\tutorials
+	del /q /s %GALLERYDIR%\advanced_io\*.npy
+        del /q /s %GALLERYDIR%\advanced_io\*.nwb
 	goto end
 )
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -151,6 +151,7 @@ def __call__(self, filename):
             'nwb_extension': ('https://github.com/nwb-extensions/%s', ''),
             'pynwb': ('https://github.com/NeurodataWithoutBorders/pynwb/%s', ''),
             'nwb_overview': ('https://nwb-overview.readthedocs.io/en/latest/%s', ''),
+            'hdmf-docs': ('https://hdmf.readthedocs.io/en/stable/%s', ''),
             'dandi': ('https://www.dandiarchive.org/%s', '')}
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -11,7 +11,7 @@ efficiently working with neurodata stored in the NWB format. If you are new to N
 and would like to learn more, then please also visit the :nwb_overview:`NWB Overview <>`
 website, which provides an entry point for researchers and developers interested in using NWB.
 
-`Neurodata Without Borders (NWB) <http://www.nwb.org/>`_ is a project to develop a
+`Neurodata Without Borders (NWB) <https://www.nwb.org/>`_ is a project to develop a
 unified data format for cellular-based neurophysiology data, focused on the
 dynamics of groups of neurons measured under a large range of experimental
 conditions.

diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst
@@ -30,7 +30,7 @@ codecov_, and the other badge shows the percentage coverage reported from codeco
 codecov_, which shows line by line which lines are covered by the tests.
 
 .. _coverage: https://coverage.readthedocs.io
-.. _codecov: https://codecov.io/gh/NeurodataWithoutBorders/pynwb/tree/dev/src/pynwb
+.. _codecov: https://app.codecov.io/gh/NeurodataWithoutBorders/pynwb/tree/dev/src/pynwb
 
 --------------------------
 Requirement Specifications