Skip to content

Commit 88d36f3

Browse files
authored
Merge branch 'dev' into add/aligned_dynamic_table
2 parents 1ee6410 + 9d85db1 commit 88d36f3

File tree

6 files changed

+276
-28
lines changed

6 files changed

+276
-28
lines changed

CHANGELOG.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
- Add experimental namespace to HDMF common schema. New data types should go in the experimental namespace
1212
(hdmf-experimental) prior to being added to the core (hdmf-common) namespace. The purpose of this is to provide
1313
a place to test new data types that may break backward compatibility as they are refined. @ajtritt (#545)
14-
1514
- Add `EnumData` type for storing data that comes from a fixed set of values. This replaces `VocabData` i.e.
1615
`VocabData` has been removed. `VocabData` stored vocabulary elements in an attribute, which has a size limit.
1716
`EnumData` now stores elements in a separate dataset, referenced by an attribute stored on the `EnumData` dataset.
@@ -20,14 +19,19 @@
2019
Each sub-table is itself a DynamicTable that is aligned with the main table by row index. Each subtable
2120
defines a sub-category in the main table effectively creating a table with sub-headings to organize columns.
2221
@oruebel (#551)
22+
- Equality check for `DynamicTable` now also checks that the name and description of the table are the same. @rly (#566)
2323

2424
### Internal improvements
2525
- Update CI and copyright year. @rly (#523, #524)
26+
- Equality check for `DynamicTable` returns False if the other object is a `DynamicTable` instead of raising an error.
27+
@rly (#566)
2628

2729
### Bug fixes
2830
- Fix CI testing on Python 3.9. @rly (#523)
2931
- Fix certain edge cases where `GroupValidator` would not validate all of the child groups or datasets
3032
attached to a `GroupBuilder`. @dsleiter (#526)
33+
- Fix generation of classes that extends both `MultiContainerInterface` and another class that extends
34+
`MultiContainerInterface`. @rly (#567)
3135

3236
## HDMF 2.4.0 (February 23, 2021)
3337

docs/gallery/dynamictable.py

+167-14
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
# The :py:class:`~hdmf.common.table.DynamicTable` class represents a column-based table
1313
# to which you can add custom columns. It consists of a name, a description, a list of
1414
# row IDs, and a list of columns. Columns are represented by
15-
# :py:class:`~hdmf.common.table.VectorData` and :py:class:`~hdmf.common.table.VectorIndex`
16-
# objects.
15+
# :py:class:`~hdmf.common.table.VectorData`, :py:class:`~hdmf.common.table.VectorIndex`,
16+
# and :py:class:`~hdmf.common.table.DynamicTableRegion` objects.
1717

1818
###############################################################################
1919
# Constructing a table
@@ -32,7 +32,7 @@
3232
###############################################################################
3333
# Initializing columns
3434
# --------------------
35-
# You can initialize a :py:class:`~hdmf.common.table.DynamicTable` with particular
35+
# You can create a :py:class:`~hdmf.common.table.DynamicTable` with particular
3636
# columns by passing a list or tuple of
3737
# :py:class:`~hdmf.common.table.VectorData` objects for the ``columns`` argument
3838
# in the constructor.
@@ -73,10 +73,25 @@
7373
id=[100, 200],
7474
)
7575

76+
###############################################################################
77+
# If a list of integers in passed to ``id``,
78+
# :py:class:`~hdmf.common.table.DynamicTable` automatically creates
79+
# an :py:class:`~hdmf.common.table.ElementIdentifiers` object, which is the data type
80+
# that stores row IDs. The above command is equivalent to
81+
82+
from hdmf.common.table import ElementIdentifiers
83+
84+
table_set_ids = DynamicTable(
85+
name='my table',
86+
description='an example table',
87+
columns=[col1, col2],
88+
id=ElementIdentifiers(name='id', data=[100, 200]),
89+
)
90+
7691
###############################################################################
7792
# Adding rows
7893
# -----------
79-
# You can add rows to a :py:class:`~hdmf.common.table.DynamicTable` using
94+
# You can also add rows to a :py:class:`~hdmf.common.table.DynamicTable` using
8095
# :py:meth:`DynamicTable.add_row <hdmf.common.table.DynamicTable.add_row>`.
8196
# A keyword argument for every column in the table must be supplied.
8297

@@ -86,10 +101,9 @@
86101
)
87102

88103
###############################################################################
89-
# You can also supply an optional row ID to
104+
# You can supply an optional row ID to
90105
# :py:meth:`DynamicTable.add_row <hdmf.common.table.DynamicTable.add_row>`.
91-
# If no ID is supplied, the ID is automatically set to the number of rows in the table
92-
# prior to adding the new row (i.e., automatic IDs start at 0).
106+
# If no ID is supplied, the automatic row IDs count up from 0.
93107

94108
table.add_row(
95109
col1=4,
@@ -118,9 +132,35 @@
118132
data=[True, True, False, True], # specify data for the 4 rows in the table
119133
)
120134

135+
###############################################################################
136+
# Enumerated Data
137+
# ---------------
138+
# :py:class:`~hdmf.common.table.EnumData` is a special type of column for storing
139+
# an enumerated data type. This way each unique value is stored once, and the data
140+
# references those values by index. Using this method is more efficient than storing
141+
# a single value many types, and has the advantage of communicating to downstream
142+
# tools that the data is categorical in nature.
143+
144+
from hdmf.common.table import EnumData
145+
146+
# this column has a length of 5, not 3
147+
enum_col = EnumData(
148+
name="cell_type",
149+
description="this column holds categorical variables",
150+
data=[0, 1, 2, 1, 0],
151+
elements=["aa", "bb", "cc"]
152+
)
153+
154+
my_table = DynamicTable(
155+
name='my table',
156+
description='an example table',
157+
columns=[enum_col],
158+
)
159+
160+
121161
###############################################################################
122162
# Ragged array columns
123-
# ^^^^^^^^^^^^^^^^^^^^
163+
# --------------------
124164
# A table column with a different number of elements for each row is called a
125165
# ragged array. To initialize a :py:class:`~hdmf.common.table.DynamicTable`
126166
# with a ragged array column, pass both
@@ -150,9 +190,10 @@
150190
####################################################################################
151191
# VectorIndex.data provides the indices for how to break VectorData.data into cells
152192
#
153-
# You can add a ragged array column to an existing
193+
# You can add an empty ragged array column to an existing
154194
# :py:class:`~hdmf.common.table.DynamicTable` by specifying ``index=True``
155195
# to :py:meth:`DynamicTable.add_column <hdmf.common.table.DynamicTable.add_column>`.
196+
# This method only works if run before any rows have been added to the table.
156197

157198
new_table = DynamicTable(
158199
name='my table',
@@ -179,6 +220,123 @@
179220
)
180221

181222
###############################################################################
223+
# Referencing rows of other tables
224+
# --------------------------------
225+
# You can create a column that references rows of another table using adding a
226+
# :py:class:`~hdmf.common.table.DynamicTableRegion` object as a column of your
227+
# :py:class:`~hdmf.common.table.DynamicTable`. This is analogous to
228+
# a foreign key in a relational database.
229+
230+
from hdmf.common.table import DynamicTableRegion
231+
232+
dtr_col = DynamicTableRegion(
233+
name='table1_ref',
234+
description='references rows of earlier table',
235+
data=[0, 1, 0, 0],
236+
table=table
237+
)
238+
239+
data_col = VectorData(
240+
name='col2',
241+
description='column #2',
242+
data=['a', 'a', 'a', 'b'],
243+
)
244+
245+
table2 = DynamicTable(
246+
name='my table',
247+
description='an example table',
248+
columns=[dtr_col, data_col],
249+
)
250+
251+
###############################################################################
252+
# Here, the ``data`` of ``dtr_col`` maps to rows of ``table`` (0-indexed).
253+
#
254+
# .. note::
255+
# The ``data`` values of :py:class:`~hdmf.common.table.DynamicTableRegion` map to the row
256+
# index, not the row ID, though if you are using default IDs. these values will be the
257+
# same.
258+
#
259+
# Reference more than one row of another table with a
260+
# :py:class:`~hdmf.common.table.DynamicTableRegion` indexed by a
261+
# :py:class:`~hdmf.common.table.VectorIndex`.
262+
263+
indexed_dtr_col = DynamicTableRegion(
264+
name='table1_ref2',
265+
description='references multiple rows of earlier table',
266+
data=[0, 0, 1, 1, 0, 0, 1],
267+
table=table
268+
)
269+
270+
dtr_idx = VectorIndex(
271+
name='table1_ref2_index',
272+
target=indexed_dtr_col,
273+
data=[2, 3, 5, 7],
274+
)
275+
276+
table3 = DynamicTable(
277+
name='my table',
278+
description='an example table',
279+
columns=[dtr_idx, indexed_dtr_col],
280+
)
281+
282+
###############################################################################
283+
# Creating an expandable table
284+
# ----------------------------
285+
# When using the default HDF5 backend, each column of these tables is an HDF5 Dataset,
286+
# which by default are set in size. This means that once a file is written, it is not
287+
# possible to add a new row. If you want to be able to save this file, load it, and add
288+
# more rows to the table, you will need to set this up when you create the
289+
# :py:class:`~hdmf.common.table.DynamicTable`. You do this by wrapping the data with
290+
# :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
291+
292+
from hdmf.backends.hdf5.h5_utils import H5DataIO
293+
294+
col1 = VectorData(
295+
name='expandable col1',
296+
description='column #1',
297+
data=H5DataIO(data=[1, 2], maxshape=(None,)),
298+
)
299+
col2 = VectorData(
300+
name='expandable col2',
301+
description='column #2',
302+
data=H5DataIO(data=['a', 'b'], maxshape=(None,)),
303+
)
304+
305+
# Don't forget to wrap the row IDs too!
306+
ids = ElementIdentifiers(
307+
name='id',
308+
data=H5DataIO(
309+
data=[0, 1],
310+
maxshape=(None,)
311+
)
312+
)
313+
314+
expandable_table = DynamicTable(
315+
name='table that can be expanded after being saved to file',
316+
description='an example table',
317+
columns=[col1, col2],
318+
id=ids,
319+
)
320+
321+
###############################################################################
322+
# Now you can write the file, read it back, and run ``expandable_table.add_row()``.
323+
# In this example, we are setting ``maxshape`` to ``(None,)``, which means this is a
324+
# 1-dimensional matrix that can expand indefinitely along its single dimension. You
325+
# could also use an integer in place of ``None``. For instance, ``maxshape=(8,)`` would
326+
# allow the column to grow up to a length of 8. Whichever ``maxshape`` you choose,
327+
# it should be the same for all :py:class:`~hdmf.common.table.VectorData`,
328+
# :py:class:`~hdmf.common.table.ElementIdentifiers`, and
329+
# :py:class:`~hdmf.common.table.DynamicTableRegion` objects in the
330+
# :py:class:`~hdmf.common.table.DynamicTable`, since they must always be the same
331+
# length. The default :py:class:`~hdmf.common.table.ElementIdentifiers` automatically
332+
# generated when you pass a list of integers to the ``id`` argument of the
333+
# :py:class:`~hdmf.common.table.DynamicTable` constructor is not expandable, so do not
334+
# forget to create a :py:class:`~hdmf.common.table.ElementIdentifiers` object, and wrap
335+
# that data as well. If any of the columns are indexed, the ``data`` arg of
336+
# :py:class:`~hdmf.common.table.VectorIndex` will also need to be wrapped in
337+
# :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
338+
#
339+
#
182340
# Converting the table to a pandas ``DataFrame``
183341
# ----------------------------------------------
184342
# `pandas`_ is a popular data analysis tool, especially for working with tabular data.
@@ -466,11 +624,6 @@
466624
table_double_ragged_col['col6'] # returns col6_ind_ind
467625
table_double_ragged_col.col6 # returns col6
468626

469-
###############################################################################
470-
# Referencing rows of a DynamicTable
471-
# ----------------------------------
472-
# TODO
473-
474627
###############################################################################
475628
# Creating custom DynamicTable subclasses
476629
# ---------------------------------------

src/hdmf/build/classgenerator.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -339,4 +339,9 @@ def post_process(cls, classdict, bases, docval_args, spec):
339339
:param spec: The spec for the container class to generate.
340340
"""
341341
if '__clsconf__' in classdict:
342-
bases.insert(0, MultiContainerInterface)
342+
# do not add MCI as a base if a base is already a subclass of MultiContainerInterface
343+
for b in bases:
344+
if issubclass(b, MultiContainerInterface):
345+
break
346+
else:
347+
bases.insert(0, MultiContainerInterface)

src/hdmf/common/table.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -583,18 +583,22 @@ def add_row(self, **kwargs):
583583
c.add_row(data[colname])
584584

585585
def __eq__(self, other):
586-
"""
587-
Compare if the two DynamicTables contain the same data
586+
"""Compare if the two DynamicTables contain the same data.
588587
589-
This implemented by converting the DynamicTables to a pandas dataframe and
590-
comparing the equality of the two tables.
588+
First this returns False if the other DynamicTable has a different name or
589+
description. Then, this table and the other table are converted to pandas
590+
dataframes and the equality of the two tables is returned.
591591
592592
:param other: DynamicTable to compare to
593593
594-
:raises: An error will be raised with to_dataframe is not defined or other
595-
596594
:return: Bool indicating whether the two DynamicTables contain the same data
597595
"""
596+
if other is self:
597+
return True
598+
if not isinstance(other, DynamicTable):
599+
return False
600+
if self.name != other.name or self.description != other.description:
601+
return False
598602
return self.to_dataframe().equals(other.to_dataframe())
599603

600604
@docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'}, # noqa: C901
@@ -1176,12 +1180,12 @@ class EnumData(VectorData):
11761180

11771181
__fields__ = ('elements', )
11781182

1179-
@docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'},
1183+
@docval({'name': 'name', 'type': str, 'doc': 'the name of this column'},
11801184
{'name': 'description', 'type': str, 'doc': 'a description for this column'},
11811185
{'name': 'data', 'type': ('array_data', 'data'),
1182-
'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()},
1186+
'doc': 'integers that index into elements for the value of each row', 'default': list()},
11831187
{'name': 'elements', 'type': ('array_data', 'data', VectorData), 'default': list(),
1184-
'doc': 'the items in this elements'})
1188+
'doc': 'lookup values for each integer in ``data``'})
11851189
def __init__(self, **kwargs):
11861190
elements = popargs('elements', kwargs)
11871191
super().__init__(**kwargs)

tests/unit/build_tests/test_classgenerator.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -828,9 +828,6 @@ def test_post_process(self):
828828
groups=[
829829
GroupSpec(data_type_inc='EmptyBar', doc='test multi', quantity='*')
830830
],
831-
attributes=[
832-
AttributeSpec(name='attr3', doc='a float attribute', dtype='float')
833-
]
834831
)
835832
classdict = dict(
836833
__clsconf__=[
@@ -847,3 +844,31 @@ def test_post_process(self):
847844
docval_args = []
848845
MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec)
849846
self.assertEqual(bases, [MultiContainerInterface, Container])
847+
848+
def test_post_process_already_multi(self):
849+
class Multi1(MultiContainerInterface):
850+
pass
851+
852+
multi_spec = GroupSpec(
853+
doc='A test extension that contains a multi and extends a multi',
854+
data_type_def='Multi2',
855+
data_type_inc='Multi1',
856+
groups=[
857+
GroupSpec(data_type_inc='EmptyBar', doc='test multi', quantity='*')
858+
],
859+
)
860+
classdict = dict(
861+
__clsconf__=[
862+
dict(
863+
attr='empty_bars',
864+
type=EmptyBar,
865+
add='add_empty_bars',
866+
get='get_empty_bars',
867+
create='create_empty_bars'
868+
)
869+
]
870+
)
871+
bases = [Multi1]
872+
docval_args = []
873+
MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec)
874+
self.assertEqual(bases, [Multi1])

0 commit comments

Comments
 (0)