Skip to content

Commit

Permalink
Add HDMF to PyNWB (#850)
Browse files Browse the repository at this point in the history
* Replace FORM (pynwb.form) with HMDF (hdmf)

* add HDMF to requirements.txt

* get requirements from requirements.txt in setup.py
  • Loading branch information
ajtritt authored Mar 14, 2019
1 parent 85eccb6 commit 2cbb725
Show file tree
Hide file tree
Showing 88 changed files with 123 additions and 11,502 deletions.
2 changes: 1 addition & 1 deletion docs/code/creating-and-writing-nwbfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def main():
# create-compressed-timeseries: start
from pynwb.ecephys import ElectricalSeries
from pynwb.behavior import SpatialSeries
from pynwb.form.backends.hdf5 import H5DataIO
from hdmf.backends.hdf5 import H5DataIO

ephys_ts = ElectricalSeries('test_compressed_ephys_data',
'an hypothetical source',
Expand Down
18 changes: 9 additions & 9 deletions docs/gallery/general/advanced_hdf5_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
'''

####################
# Wrapping data arrays with :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
# Wrapping data arrays with :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
# ---------------------------------------------------------------------------------
#
# In order to customize the I/O of datasets using the HDF I/O backend we simply need to wrap our datasets
# using :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`. Using H5DataIO allows us to keep the Container
# using :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`. Using H5DataIO allows us to keep the Container
# classes independent of the I/O backend while still allowing us to customize HDF5-specific I/O features.
#
# Before we get started, lets create an NWBFile for testing so that we can add our data to it.
Expand Down Expand Up @@ -49,7 +49,7 @@
# Now let's say we want to compress the recorded data values. We now simply need to wrap our data with H5DataIO.
# Everything else remains the same

from pynwb.form.backends.hdf5.h5_utils import H5DataIO
from hdmf.backends.hdf5.h5_utils import H5DataIO
wrapped_data = H5DataIO(data=data, compression=True) # <----
test_ts = TimeSeries(name='test_compressed_timeseries',
data=wrapped_data, # <----
Expand All @@ -59,7 +59,7 @@

####################
# This simple approach gives us access to a broad range of advanced I/O features, such as, chunking and
# compression. For a complete list of all available settings see :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
# compression. For a complete list of all available settings see :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`

####################
# Chunking
Expand All @@ -81,7 +81,7 @@


####################
# To use chunking we again, simply need to wrap our dataset via :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`.
# To use chunking we again, simply need to wrap our dataset via :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
# Using chunking then also allows to also create resizable arrays simply by defining the ``maxshape`` of the array.

data = np.arange(10000).reshape((1000, 10))
Expand Down Expand Up @@ -122,7 +122,7 @@
# read/write operations. I/O filters operate on a per-chunk basis in HDF5 and as such require the use of chunking.
# Chunking will be automatically enabled by h5py when compression and other I/O filters are enabled.
#
# To use compression, we can wrap our dataset using :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO` and
# To use compression, we can wrap our dataset using :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO` and
# define the approbriate opions:

wrapped_data = H5DataIO(data=data,
Expand All @@ -139,7 +139,7 @@
####################
# .. hint::
#
# In addition to ``compression``, :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO` also allows us to
# In addition to ``compression``, :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO` also allows us to
# enable the ``shuffle`` and ``fletcher32`` HDF5 I/O filters.

####################
Expand Down Expand Up @@ -200,10 +200,10 @@


####################
# Wrapping ``h5py.Datasets`` with :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
# Wrapping ``h5py.Datasets`` with :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`
# ------------------------------------------------------------------------------------------------
#
# Just for completeness, :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO` also allows us to customize
# Just for completeness, :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO` also allows us to customize
# how ``h5py.Dataset`` objects should be handled on write by the PyNWBs HDF5 backend via the ``link_data``
# parameter. If ``link_data`` is set to ``True`` then a ``SoftLink`` or ``ExternalLink`` will be created to
# point to the HDF5 dataset On the other hand, if ``link_data`` is set to ``False`` then the dataset
Expand Down
12 changes: 6 additions & 6 deletions docs/gallery/general/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#
# The following block of code demonstrates how to create a new namespace, and then add a new `neurodata_type`
# to this namespace. Finally,
# it calls :py:meth:`~pynwb.form.spec.write.NamespaceBuilder.export` to save the extensions to disk for downstream use.
# it calls :py:meth:`~hdmf.spec.write.NamespaceBuilder.export` to save the extensions to disk for downstream use.

from pynwb.spec import NWBNamespaceBuilder, NWBGroupSpec, NWBAttributeSpec

Expand Down Expand Up @@ -96,7 +96,7 @@

from pynwb import register_class, load_namespaces
from pynwb.ecephys import ElectricalSeries
from pynwb.form.utils import docval, call_docval_func, getargs, get_docval
from hdmf.utils import docval, call_docval_func, getargs, get_docval

ns_path = "mylab.namespace.yaml"
load_namespaces(ns_path)
Expand All @@ -117,9 +117,9 @@ def __init__(self, **kwargs):
####################
# .. note::
#
# See the API docs for more information about :py:func:`~pynwb.form.utils.docval`
# :py:func:`~pynwb.form.utils.call_docval_func`, :py:func:`~pynwb.form.utils.getargs`
# and :py:func:`~pynwb.form.utils.get_docval`
# See the API docs for more information about :py:func:`~hdmf.utils.docval`
# :py:func:`~hdmf.utils.call_docval_func`, :py:func:`~hdmf.utils.getargs`
# and :py:func:`~hdmf.utils.get_docval`
#
# When extending :py:class:`~pynwb.core.NWBContainer` or :py:class:`~pynwb.core.NWBContainer`
# subclasses, you should defining the class field ``__nwbfields__``. This will
Expand Down Expand Up @@ -151,7 +151,7 @@ def __init__(self, **kwargs):
# -----------------------------------------------------
#
# Extensions can be cached to file so that your NWB file will carry the extensions needed to read the file with it.
# This is done by setting *cache_spec* to *True* when calling :py:meth:`~pynwb.form.backends.hdf5.h5tools.HDF5IO.write`
# This is done by setting *cache_spec* to *True* when calling :py:meth:`~hdmf.backends.hdf5.h5tools.HDF5IO.write`
# on :py:class:`~pynwb.NWBHDF5IO` (See :ref:`basic_writing` for more on writing NWB files).
#
# To demonstrate this, first we will make some fake data using our extensions.
Expand Down
66 changes: 33 additions & 33 deletions docs/gallery/general/iterative_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,20 @@
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# In PyNWB the process of iterating over large data arrays is implemented via the concept of
# :py:class:`~pynwb.form.data_utils.DataChunk` and :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.
# :py:class:`~hdmf.data_utils.DataChunk` and :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.
#
# * :py:class:`~pynwb.form.data_utils.DataChunk` is a simple data structure used to describe
# * :py:class:`~hdmf.data_utils.DataChunk` is a simple data structure used to describe
# a subset of a larger data array (i.e., a data chunk), consisting of:
#
# * ``DataChunk.data`` : the array with the data value(s) of the chunk and
# * ``DataChunk.selection`` : the NumPy index tuple describing the location of the chunk in the whole array.
#
# * :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator` then defines a class for iterating over large
# data arrays one-:py:class:`~pynwb.form.data_utils.DataChunk`-at-a-time.
# * :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` then defines a class for iterating over large
# data arrays one-:py:class:`~hdmf.data_utils.DataChunk`-at-a-time.
#
# * :py:class:`~pynwb.form.data_utils.DataChunkIterator` is a specific implementation of an
# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator` that accepts any iterable and assumes
# that we iterate over the first dimension of the data array. :py:class:`~pynwb.form.data_utils.DataChunkIterator`
# * :py:class:`~hdmf.data_utils.DataChunkIterator` is a specific implementation of an
# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that accepts any iterable and assumes
# that we iterate over the first dimension of the data array. :py:class:`~hdmf.data_utils.DataChunkIterator`
# also supports buffered read, i.e., multiple values from the input iterator can be combined to a single chunk.
# This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and
# writing larger blocks at once.
Expand All @@ -77,17 +77,17 @@
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# On the front end, all a user needs to do is to create or wrap their data in a
# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`. The I/O backend (e.g.,
# :py:class:`~pynwb.form.backends.hdf5.h5tools.HDF5IO` or :py:class:`~pynwb.NWBHDF5IO`) then
# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`. The I/O backend (e.g.,
# :py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO` or :py:class:`~pynwb.NWBHDF5IO`) then
# implements the iterative processing of the data chunk iterators. PyNWB also provides with
# :py:class:`~pynwb.form.data_utils.DataChunkIterator` a specific implementation of a data chunk iterator
# :py:class:`~hdmf.data_utils.DataChunkIterator` a specific implementation of a data chunk iterator
# which we can use to wrap common iterable types (e.g., generators, lists, or numpy arrays).
# For more advanced use cases we then need to implement our own derived class of
# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.
# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.
#
# .. tip::
#
# Currently the HDF5 I/O backend of PyNWB (:py:class:`~pynwb.form.backends.hdf5.h5tools.HDF5IO`,
# Currently the HDF5 I/O backend of PyNWB (:py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO`,
# :py:class:`~pynwb.NWBHDF5IO`) processes itertive data writes one-dataset-at-a-time. This means, that
# while you may have an arbitrary number of iterative data writes, the write is performed in order.
# In the future we may use a queing process to enable the simultaneous processing of multiple iterative writes at
Expand Down Expand Up @@ -172,7 +172,7 @@ def iter_sin(chunk_length=10, max_chunks=100):
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#

from pynwb.form.data_utils import DataChunkIterator
from hdmf.data_utils import DataChunkIterator

data = DataChunkIterator(data=iter_sin(10))

Expand Down Expand Up @@ -201,22 +201,22 @@ def iter_sin(chunk_length=10, max_chunks=100):
#
# maxshape=(None, 10), recommended_data_shape=(1, 10), dtype=float64
#
# As we can see :py:class:`~pynwb.form.data_utils.DataChunkIterator` automatically recommends
# As we can see :py:class:`~hdmf.data_utils.DataChunkIterator` automatically recommends
# in its ``maxshape`` that the first dimensions of our array should be unlimited (``None``) and the second
# dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~pynwb.form.data_utils.DataChunkIterator`
# dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator`
# has no way of knowing the minimum size of the array it automatically recommends the size of the first
# chunk as the minimum size (i.e, ``(1, 10)``) and also infers the data type automatically from the first chunk.
# To further customize this behavior we may also define the ``maxshape``, ``dtype``, and ``buffer_size`` when
# we create the :py:class:`~pynwb.form.data_utils.DataChunkIterator`.
# we create the :py:class:`~hdmf.data_utils.DataChunkIterator`.
#
# .. tip::
#
# We here used :py:class:`~pynwb.form.data_utils.DataChunkIterator` to conveniently wrap our data stream.
# :py:class:`~pynwb.form.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
# We here used :py:class:`~hdmf.data_utils.DataChunkIterator` to conveniently wrap our data stream.
# :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
# **single** complete element along the **first dimension** of our a array (i.e., iterate over the first
# axis and yield one-element-at-a-time). This behavior is useful in many practical cases. However, if
# this strategy does not match our needs, then you can alternatively implement our own derived
# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`. We show an example of this next.
# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`. We show an example of this next.
#


Expand All @@ -227,7 +227,7 @@ def iter_sin(chunk_length=10, max_chunks=100):
# Step 1: Create a data chunk iterator for our sparse matrix
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

from pynwb.form.data_utils import AbstractDataChunkIterator, DataChunk
from hdmf.data_utils import AbstractDataChunkIterator, DataChunk


class SparseMatrixIterator(AbstractDataChunkIterator):
Expand Down Expand Up @@ -306,8 +306,8 @@ def maxshape(self):

#####################
# In order to also enable compression and other advanced HDF5 dataset I/O featurs we can then also
# wrap our data via :py:class:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`.
from pynwb.form.backends.hdf5.h5_utils import H5DataIO
# wrap our data via :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
from hdmf.backends.hdf5.h5_utils import H5DataIO
matrix2 = SparseMatrixIterator(shape=(xsize, ysize),
num_chunks=num_chunks,
chunk_shape=chunk_shape)
Expand All @@ -318,7 +318,7 @@ def maxshape(self):
######################
# We can now also customize the chunking , fillvalue and other settings
#
from pynwb.form.backends.hdf5.h5_utils import H5DataIO
from hdmf.backends.hdf5.h5_utils import H5DataIO

# Increase the chunk size and add compression
matrix3 = SparseMatrixIterator(shape=(xsize, ysize),
Expand Down Expand Up @@ -427,7 +427,7 @@ def maxshape(self):
#
# **Advantages:**
#
# * We only need to hold one :py:class:`~pynwb.form.data_utils.DataChunk` in memory at any given time
# * We only need to hold one :py:class:`~hdmf.data_utils.DataChunk` in memory at any given time
# * Only the data chunks in the HDF5 file that contain non-default values are ever being allocated
# * The overall size of our file is reduced significantly
# * Reduced I/O load
Expand All @@ -437,7 +437,7 @@ def maxshape(self):
#
# With great power comes great responsibility **!** I/O and storage cost will depend among others on the chunk size,
# compression options, and the write pattern, i.e., the number and structure of the
# :py:class:`~pynwb.form.data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them
# :py:class:`~hdmf.data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them
# one value at a time would result in poor I/O performance in most practical cases, because of the large number of
# chunks and large number of small I/O operations required.
#
Expand Down Expand Up @@ -489,7 +489,7 @@ def maxshape(self):
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Note, we here use a generator for simplicity but we could equally well also implement our own
# :py:class:`~pynwb.form.data_utils.AbstractDataChunkIterator`.
# :py:class:`~hdmf.data_utils.AbstractDataChunkIterator`.


def iter_largearray(filename, shape, dtype='float64'):
Expand All @@ -510,7 +510,7 @@ def iter_largearray(filename, shape, dtype='float64'):
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#

from pynwb.form.data_utils import DataChunkIterator
from hdmf.data_utils import DataChunkIterator

data = DataChunkIterator(data=iter_largearray(filename='basic_sparse_iterwrite_testdata.npy',
shape=datashape),
Expand All @@ -530,8 +530,8 @@ def iter_largearray(filename, shape, dtype='float64'):
# .. tip::
#
# Again, if we want to explicitly control how our data will be chunked (compressed etc.)
# in the HDF5 file then we need to wrap our :py:class:`~pynwb.form.data_utils.DataChunkIterator`
# using :py:class:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
# in the HDF5 file then we need to wrap our :py:class:`~hdmf.data_utils.DataChunkIterator`
# using :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`

####################
# Discussion
Expand Down Expand Up @@ -589,7 +589,7 @@ def iter_largearray(filename, shape, dtype='float64'):
# Step 1: Create a data chunk iterator for our multifile array
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

from pynwb.form.data_utils import AbstractDataChunkIterator, DataChunk # noqa
from hdmf.data_utils import AbstractDataChunkIterator, DataChunk # noqa


class MultiFileArrayIterator(AbstractDataChunkIterator):
Expand Down Expand Up @@ -666,16 +666,16 @@ def maxshape(self):
#
# Common mistakes that will result in errors on write:
#
# * The size of a :py:class:`~pynwb.form.data_utils.DataChunk` does not match the selection.
# * The selection for the :py:class:`~pynwb.form.data_utils.DataChunk` is not supported by h5py
# * The size of a :py:class:`~hdmf.data_utils.DataChunk` does not match the selection.
# * The selection for the :py:class:`~hdmf.data_utils.DataChunk` is not supported by h5py
# (e.g., unordered lists etc.)
#
# Other common mistakes:
#
# * Choosing inappropriate chunk sizes. This typically means bad performance with regard to I/O and/or storage cost.
# * Using auto chunking without supplying a good recommended_data_shape. h5py auto chunking can only make a good
# guess of what the chunking should be if it (at least roughly) knows what the shape of the array will be.
# * Trying to wrap a data generator using the default :py:class:`~pynwb.form.data_utils.DataChunkIterator`
# * Trying to wrap a data generator using the default :py:class:`~hdmf.data_utils.DataChunkIterator`
# when the generator does not comply with the assumptions of the default implementation (i.e., yield
# individual, complete elements along the first dimension of the array one-at-a-time). Depending on the generator,
# this may or may not result in an error on write, but the array you are generating will probably end up
Expand Down
10 changes: 5 additions & 5 deletions docs/gallery/general/linking_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@
# our TimeSeries, this means that :py:class:`~pynwb.NWBHDF5IO` will need to
# determine on write how to treat the dataset. We can make this explicit and customize this
# behavior on a per-dataset basis by wrapping our dataset using
# :py:meth:`~pynwb.form.backends.hdf5.h5_utils.H5DataIO`
# :py:meth:`~hdmf.backends.hdf5.h5_utils.H5DataIO`

from pynwb.form.backends.hdf5.h5_utils import H5DataIO
from hdmf.backends.hdf5.h5_utils import H5DataIO

# Create another timeseries that links to the same data
test_ts5 = TimeSeries(name='test_timeseries5',
Expand Down Expand Up @@ -188,7 +188,7 @@
# ---------------------------
#
# Appending to files and linking is made possible by passing around the same
# :py:class:`~pynwb.form.build.map.BuildManager`. You can get a manager to pass around
# :py:class:`~hdmf.build.map.BuildManager`. You can get a manager to pass around
# using the :py:meth:`~pynwb.get_manager` function.
#

Expand Down Expand Up @@ -245,5 +245,5 @@
#
# External links are convenient but to share data we may want to hand a single file with all the
# data to our collaborator rather than having to collect all relevant files. To do this,
# :py:class:`~pynwb.form.backends.hdf5.h5tools.HDF5IO` (and in turn :py:class:`~pynwb.NWBHDF5IO`)
# provide the convenience function :py:func:`~pynwb.form.backends.hdf5.h5tools.HDF5IO.copy_file`
# :py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO` (and in turn :py:class:`~pynwb.NWBHDF5IO`)
# provide the convenience function :py:func:`~hdmf.backends.hdf5.h5tools.HDF5IO.copy_file`
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
certifi==2018.1.18
certifi==2019.3.9
chardet==3.0.4
codecov==2.0.15
configparser==3.5.0
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
certifi==2018.1.18
certifi==2019.3.9
chardet==3.0.4
h5py==2.9.0
idna==2.6
Expand All @@ -9,3 +9,4 @@ ruamel.yaml==0.15.85
six==1.11.0
urllib3==1.23
pandas==0.23.4
hdmf==1.0.1
Loading

0 comments on commit 2cbb725

Please sign in to comment.