Add HDMF to PyNWB (#850)

* Replace FORM (pynwb.form) with HMDF (hdmf) * add HDMF to requirements.txt * get requirements from requirements.txt in setup.py
NeurodataWithoutBorders · Mar 14, 2019 · 2cbb725 · 2cbb725
1 parent 85eccb6
commit 2cbb725
Show file tree

Hide file tree

Showing 88 changed files with 123 additions and 11,502 deletions.
diff --git a/docs/code/creating-and-writing-nwbfile.py b/docs/code/creating-and-writing-nwbfile.py
@@ -136,7 +136,7 @@ def main():
     # create-compressed-timeseries: start
     from pynwb.ecephys import ElectricalSeries
     from pynwb.behavior import SpatialSeries
-    from pynwb.form.backends.hdf5 import H5DataIO
+    from hdmf.backends.hdf5 import H5DataIO
 
     ephys_ts = ElectricalSeries('test_compressed_ephys_data',
                                 'an hypothetical source',

diff --git a/docs/gallery/general/advanced_hdf5_io.py b/docs/gallery/general/advanced_hdf5_io.py
@@ -8,11 +8,11 @@
 '''
 
 ####################
-# Wrapping data arrays with :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
+# Wrapping data arrays with :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
 # ---------------------------------------------------------------------------------
 #
 # In order to customize the I/O of datasets using the HDF I/O backend we simply need to wrap our datasets
-# using :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`. Using H5DataIO allows us to keep the Container
+# using :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`. Using H5DataIO allows us to keep the Container
 # classes independent of the I/O backend while still allowing us to customize HDF5-specific I/O features.
 #
 # Before we get started, lets create an NWBFile for testing so that we can add our data to it.
@@ -49,7 +49,7 @@
 # Now let's say we want to compress the recorded data values. We now simply need to wrap our data with H5DataIO.
 # Everything else remains the same
 
-from pynwb.form.backends.hdf5.h5_utils import H5DataIO
+from hdmf.backends.hdf5.h5_utils import H5DataIO
 wrapped_data = H5DataIO(data=data, compression=True)     # <----
 test_ts = TimeSeries(name='test_compressed_timeseries',
                      data=wrapped_data,                  # <----
@@ -59,7 +59,7 @@
 
 ####################
 # This simple approach gives us access to a broad range of advanced I/O features, such as, chunking and
-# compression. For a complete list of all available settings see :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
+# compression. For a complete list of all available settings see :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
 
 ####################
 # Chunking
@@ -81,7 +81,7 @@
 
 
 ####################
-# To use chunking we again, simply need to wrap our dataset via :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`.
+# To use chunking we again, simply need to wrap our dataset via :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
 # Using chunking then also allows to also create resizable arrays simply by defining the ``maxshape`` of the array.
 
 data = np.arange(10000).reshape((1000, 10))
@@ -122,7 +122,7 @@
 # read/write operations.  I/O filters operate on a per-chunk basis in HDF5 and as such require the use of chunking.
 # Chunking will be automatically enabled by h5py when compression and other I/O filters are enabled.
 #
-# To use compression, we can wrap our dataset using :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO` and
+# To use compression, we can wrap our dataset using :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO` and
 # define the approbriate opions:
 
 wrapped_data = H5DataIO(data=data,
@@ -139,7 +139,7 @@
 ####################
 # .. hint::
 #
-#   In addition to ``compression``, :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO` also allows us to
+#   In addition to ``compression``, :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO` also allows us to
 #   enable the ``shuffle`` and ``fletcher32`` HDF5 I/O filters.
 
 ####################
@@ -200,10 +200,10 @@
 
 
 ####################
-# Wrapping ``h5py.Datasets`` with :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
+# Wrapping ``h5py.Datasets`` with :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
 # ------------------------------------------------------------------------------------------------
 #
-# Just for completeness, :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO` also allows us to customize
+# Just for completeness, :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO` also allows us to customize
 # how ``h5py.Dataset`` objects should be handled on write by the PyNWBs HDF5 backend via the ``link_data``
 # parameter. If ``link_data`` is set to ``True`` then a ``SoftLink`` or ``ExternalLink`` will be created to
 # point to the HDF5 dataset On the other hand, if ``link_data`` is set to ``False`` then the dataset

diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py
@@ -28,7 +28,7 @@
 #
 # The following block of code demonstrates how to create a new namespace, and then add a new `neurodata_type`
 # to this namespace. Finally,
-# it calls :py:meth:`~pynwb.form.spec.write.NamespaceBuilder.export` to save the extensions to disk for downstream use.
+# it calls :py:meth:`~hdmf.spec.write.NamespaceBuilder.export` to save the extensions to disk for downstream use.
 
 from pynwb.spec import NWBNamespaceBuilder, NWBGroupSpec, NWBAttributeSpec
 
@@ -96,7 +96,7 @@
 
 from pynwb import register_class, load_namespaces
 from pynwb.ecephys import ElectricalSeries
-from pynwb.form.utils import docval, call_docval_func, getargs, get_docval
+from hdmf.utils import docval, call_docval_func, getargs, get_docval
 
 ns_path = "mylab.namespace.yaml"
 load_namespaces(ns_path)
@@ -117,9 +117,9 @@ def __init__(self, **kwargs):
 ####################
 # .. note::
 #
-#     See the API docs for more information about :py:func:`~pynwb.form.utils.docval`
-#     :py:func:`~pynwb.form.utils.call_docval_func`, :py:func:`~pynwb.form.utils.getargs`
-#     and :py:func:`~pynwb.form.utils.get_docval`
+#     See the API docs for more information about :py:func:`~hdmf.utils.docval`
+#     :py:func:`~hdmf.utils.call_docval_func`, :py:func:`~hdmf.utils.getargs`
+#     and :py:func:`~hdmf.utils.get_docval`
 #
 # When extending :py:class:`~pynwb.core.NWBContainer` or :py:class:`~pynwb.core.NWBContainer`
 # subclasses, you should defining the class field ``__nwbfields__``. This will
@@ -151,7 +151,7 @@ def __init__(self, **kwargs):
 # -----------------------------------------------------
 #
 # Extensions can be cached to file so that your NWB file will carry the extensions needed to read the file with it.
-# This is done by setting *cache_spec* to *True* when calling :py:meth:`~pynwb.form.backends.hdf5.h5tools.HDF5IO.write`
+# This is done by setting *cache_spec* to *True* when calling :py:meth:`~hdmf.backends.hdf5.h5tools.HDF5IO.write`
 # on :py:class:`~pynwb.NWBHDF5IO` (See :ref:`basic_writing` for more on writing NWB files).
 #
 # To demonstrate this, first we will make some fake data using our extensions.

diff --git a/docs/gallery/general/iterative_write.py b/docs/gallery/general/iterative_write.py
@@ -53,20 +53,20 @@
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # In PyNWB the process of iterating over large data arrays is implemented via the concept of
-# :py:class:`~pynwb.form.data_utils.DataChunk` and :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.
+# :py:class:`~hdmf.data_utils.DataChunk` and :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.
 #
-# * :py:class:`~pynwb.form.data_utils.DataChunk` is a simple data structure used to describe
+# * :py:class:`~hdmf.data_utils.DataChunk` is a simple data structure used to describe
 #   a subset of a larger data array (i.e., a data chunk), consisting of:
 #
 #   * ``DataChunk.data`` : the array with the data value(s) of the chunk and
 #   * ``DataChunk.selection`` : the NumPy index tuple describing the location of the chunk in the whole array.
 #
-# * :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator` then defines a class for iterating over large
-#   data arrays one-:py:class:`~pynwb.form.data_utils.DataChunk`-at-a-time.
+# * :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` then defines a class for iterating over large
+#   data arrays one-:py:class:`~hdmf.data_utils.DataChunk`-at-a-time.
 #
-# * :py:class:`~pynwb.form.data_utils.DataChunkIterator` is a specific implementation of an
-#   :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator` that accepts any iterable and assumes
-#   that we iterate over the first dimension of the data array. :py:class:`~pynwb.form.data_utils.DataChunkIterator`
+# * :py:class:`~hdmf.data_utils.DataChunkIterator` is a specific implementation of an
+#   :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that accepts any iterable and assumes
+#   that we iterate over the first dimension of the data array. :py:class:`~hdmf.data_utils.DataChunkIterator`
 #   also supports buffered read, i.e., multiple values from the input iterator can be combined to a single chunk.
 #   This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and
 #   writing larger blocks at once.
@@ -77,17 +77,17 @@
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # On the front end, all a user needs to do is to create or wrap their data in a
-# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`. The I/O backend (e.g.,
-# :py:class:`~pynwb.form.backends.hdf5.h5tools.HDF5IO` or :py:class:`~pynwb.NWBHDF5IO`) then
+# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`. The I/O backend (e.g.,
+# :py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO` or :py:class:`~pynwb.NWBHDF5IO`) then
 # implements the iterative processing of the data chunk iterators. PyNWB also provides with
-# :py:class:`~pynwb.form.data_utils.DataChunkIterator` a specific implementation of a data chunk iterator
+# :py:class:`~hdmf.data_utils.DataChunkIterator` a specific implementation of a data chunk iterator
 # which we can use to wrap common iterable types (e.g., generators, lists, or numpy arrays).
 # For more advanced use cases we then need to implement our own derived class of
-# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.
+# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.
 #
 # .. tip::
 #
-#    Currently the HDF5 I/O backend of PyNWB (:py:class:`~pynwb.form.backends.hdf5.h5tools.HDF5IO`,
+#    Currently the HDF5 I/O backend of PyNWB (:py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO`,
 #    :py:class:`~pynwb.NWBHDF5IO`) processes itertive data writes one-dataset-at-a-time. This means, that
 #    while you may have an arbitrary number of iterative data writes, the write is performed in order.
 #    In the future we may use a queing process to enable the simultaneous processing of multiple iterative writes at
@@ -172,7 +172,7 @@ def iter_sin(chunk_length=10, max_chunks=100):
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 
-from pynwb.form.data_utils import DataChunkIterator
+from hdmf.data_utils import DataChunkIterator
 
 data = DataChunkIterator(data=iter_sin(10))
 
@@ -201,22 +201,22 @@ def iter_sin(chunk_length=10, max_chunks=100):
 #
 #   maxshape=(None, 10), recommended_data_shape=(1, 10), dtype=float64
 #
-# As we can see :py:class:`~pynwb.form.data_utils.DataChunkIterator` automatically recommends
+# As we can see :py:class:`~hdmf.data_utils.DataChunkIterator` automatically recommends
 # in its ``maxshape`` that the first dimensions of our array should be unlimited (``None``) and the second
-# dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~pynwb.form.data_utils.DataChunkIterator`
+# dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator`
 # has no way of knowing the minimum size of the array it automatically recommends the size of the first
 # chunk as the minimum size (i.e, ``(1, 10)``) and also infers the data type automatically from the first chunk.
 # To further customize this behavior we may also define the ``maxshape``, ``dtype``, and ``buffer_size`` when
-# we create the :py:class:`~pynwb.form.data_utils.DataChunkIterator`.
+# we create the :py:class:`~hdmf.data_utils.DataChunkIterator`.
 #
 # .. tip::
 #
-#    We here used :py:class:`~pynwb.form.data_utils.DataChunkIterator` to conveniently wrap our data stream.
-#    :py:class:`~pynwb.form.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
+#    We here used :py:class:`~hdmf.data_utils.DataChunkIterator` to conveniently wrap our data stream.
+#    :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
 #    **single** complete element along the **first dimension** of our a array (i.e., iterate over the first
 #    axis and yield one-element-at-a-time). This behavior is useful in many practical cases. However, if
 #    this strategy does not match our needs, then you can alternatively implement our own derived
-#    :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.  We show an example of this next.
+#    :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.  We show an example of this next.
 #
 
 
@@ -227,7 +227,7 @@ def iter_sin(chunk_length=10, max_chunks=100):
 # Step 1: Create a data chunk iterator for our sparse matrix
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-from pynwb.form.data_utils import AbstractDataChunkIterator, DataChunk
+from hdmf.data_utils import AbstractDataChunkIterator, DataChunk
 
 
 class SparseMatrixIterator(AbstractDataChunkIterator):
@@ -306,8 +306,8 @@ def maxshape(self):
 
 #####################
 # In order to also enable compression and other advanced HDF5 dataset I/O featurs we can then also
-# wrap our data via :py:class:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`.
-from pynwb.form.backends.hdf5.h5_utils import H5DataIO
+# wrap our data via :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
+from hdmf.backends.hdf5.h5_utils import H5DataIO
 matrix2 = SparseMatrixIterator(shape=(xsize, ysize),
                                num_chunks=num_chunks,
                                chunk_shape=chunk_shape)
@@ -318,7 +318,7 @@ def maxshape(self):
 ######################
 # We can now also customize the chunking , fillvalue and other settings
 #
-from pynwb.form.backends.hdf5.h5_utils import H5DataIO
+from hdmf.backends.hdf5.h5_utils import H5DataIO
 
 # Increase the chunk size and add compression
 matrix3 = SparseMatrixIterator(shape=(xsize, ysize),
@@ -427,7 +427,7 @@ def maxshape(self):
 #
 # **Advantages:**
 #
-# * We only need to hold one :py:class:`~pynwb.form.data_utils.DataChunk` in memory at any given time
+# * We only need to hold one :py:class:`~hdmf.data_utils.DataChunk` in memory at any given time
 # * Only the data chunks in the HDF5 file that contain non-default values are ever being allocated
 # * The overall size of our file is reduced significantly
 # * Reduced I/O load
@@ -437,7 +437,7 @@ def maxshape(self):
 #
 #    With great power comes great responsibility **!** I/O and storage cost will depend among others on the chunk size,
 #    compression options, and the write pattern, i.e., the number and structure of the
-#    :py:class:`~pynwb.form.data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them
+#    :py:class:`~hdmf.data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them
 #    one value at a time would result in poor I/O performance in most practical cases, because of the large number of
 #    chunks and large number of small I/O operations required.
 #
@@ -489,7 +489,7 @@ def maxshape(self):
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # Note, we here use a generator for simplicity but we could equally well also implement our own
-# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.
+# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.
 
 
 def iter_largearray(filename, shape, dtype='float64'):
@@ -510,7 +510,7 @@ def iter_largearray(filename, shape, dtype='float64'):
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 
-from pynwb.form.data_utils import DataChunkIterator
+from hdmf.data_utils import DataChunkIterator
 
 data = DataChunkIterator(data=iter_largearray(filename='basic_sparse_iterwrite_testdata.npy',
                                               shape=datashape),
@@ -530,8 +530,8 @@ def iter_largearray(filename, shape, dtype='float64'):
 # .. tip::
 #
 #       Again, if we want to explicitly control how our data will be chunked (compressed etc.)
-#       in the HDF5 file then we need to wrap our :py:class:`~pynwb.form.data_utils.DataChunkIterator`
-#       using :py:class:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
+#       in the HDF5 file then we need to wrap our :py:class:`~hdmf.data_utils.DataChunkIterator`
+#       using :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
 
 ####################
 # Discussion
@@ -589,7 +589,7 @@ def iter_largearray(filename, shape, dtype='float64'):
 # Step 1: Create a data chunk iterator for our multifile array
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-from pynwb.form.data_utils import AbstractDataChunkIterator, DataChunk   # noqa
+from hdmf.data_utils import AbstractDataChunkIterator, DataChunk   # noqa
 
 
 class MultiFileArrayIterator(AbstractDataChunkIterator):
@@ -666,16 +666,16 @@ def maxshape(self):
 #
 #    Common mistakes that will result in errors on write:
 #
-#    * The size of a :py:class:`~pynwb.form.data_utils.DataChunk` does not match the selection.
-#    * The selection for the :py:class:`~pynwb.form.data_utils.DataChunk` is not supported by h5py
+#    * The size of a :py:class:`~hdmf.data_utils.DataChunk` does not match the selection.
+#    * The selection for the :py:class:`~hdmf.data_utils.DataChunk` is not supported by h5py
 #      (e.g., unordered lists etc.)
 #
 #    Other common mistakes:
 #
 #    * Choosing inappropriate chunk sizes. This typically means bad performance with regard to I/O and/or storage cost.
 #    * Using auto chunking without supplying a good recommended_data_shape. h5py auto chunking can only make a good
 #      guess of what the chunking should be if it (at least roughly) knows what the shape of the array will be.
-#    * Trying to wrap a data generator using the default :py:class:`~pynwb.form.data_utils.DataChunkIterator`
+#    * Trying to wrap a data generator using the default :py:class:`~hdmf.data_utils.DataChunkIterator`
 #      when the generator does not comply with the assumptions of the default implementation (i.e., yield
 #      individual, complete elements along the first dimension of the array one-at-a-time). Depending on the generator,
 #      this may or may not result in an error on write, but the array you are generating will probably end up

diff --git a/docs/gallery/general/linking_data.py b/docs/gallery/general/linking_data.py
@@ -151,9 +151,9 @@
 # our TimeSeries, this means that :py:class:`~pynwb.NWBHDF5IO` will need to
 # determine on write how to treat the dataset. We can make this explicit and customize this
 # behavior on a per-dataset basis by wrapping our dataset using
-# :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
+# :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
 
-from pynwb.form.backends.hdf5.h5_utils import H5DataIO
+from hdmf.backends.hdf5.h5_utils import H5DataIO
 
 # Create another timeseries that links to the same data
 test_ts5 = TimeSeries(name='test_timeseries5',
@@ -188,7 +188,7 @@
 # ---------------------------
 #
 # Appending to files and linking is made possible by passing around the same
-# :py:class:`~pynwb.form.build.map.BuildManager`. You can get a manager to pass around
+# :py:class:`~hdmf.build.map.BuildManager`. You can get a manager to pass around
 # using the :py:meth:`~pynwb.get_manager` function.
 #
 
@@ -245,5 +245,5 @@
 #
 # External links are convenient but to share data we may want to hand a single file with all the
 # data to our collaborator rather than having to collect all relevant files. To do this,
-# :py:class:`~pynwb.form.backends.hdf5.h5tools.HDF5IO` (and in turn :py:class:`~pynwb.NWBHDF5IO`)
-# provide the convenience function :py:func:`~pynwb.form.backends.hdf5.h5tools.HDF5IO.copy_file`
+# :py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO` (and in turn :py:class:`~pynwb.NWBHDF5IO`)
+# provide the convenience function :py:func:`~hdmf.backends.hdf5.h5tools.HDF5IO.copy_file`
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,4 +1,4 @@
-certifi==2018.1.18
+certifi==2019.3.9
 chardet==3.0.4
 codecov==2.0.15
 configparser==3.5.0

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-certifi==2018.1.18
+certifi==2019.3.9
 chardet==3.0.4
 h5py==2.9.0
 idna==2.6
@@ -9,3 +9,4 @@ ruamel.yaml==0.15.85
 six==1.11.0
 urllib3==1.23
 pandas==0.23.4
+hdmf==1.0.1