Skip to content

Commit

Permalink
Merge pull request #682 from NeurodataWithoutBorders/enh/epochs_dynam…
Browse files Browse the repository at this point in the history
…ic_table

Enh/epochs dynamic table
  • Loading branch information
ajtritt authored Oct 25, 2018
2 parents 426109b + 769ff89 commit 5bcdc83
Show file tree
Hide file tree
Showing 24 changed files with 368 additions and 424 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ references:
pip install flake8
flake8 src/pynwb
flake8 tests
flake8 --ignore E402,E501 docs/gallery
flake8 --ignore E402,E501,W504 docs/gallery
initialize-venv: &initialize-venv
name: Initialize Virtual Environment
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ test:
flake:
$(FLAKE) src/
$(FLAKE) tests/
$(FLAKE) --ignore E402 docs/gallery
$(FLAKE) --ignore E402,W504 docs/gallery

checkpdb:
find {src,tests} -name "*.py" -exec grep -Hn pdb {} \;
Expand Down
6 changes: 2 additions & 4 deletions docs/gallery/domain/brain_observatory.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,13 @@
nwbfile.create_epoch(start_time=row.start,
stop_time=row.end,
timeseries=[running_speed],
tags='trials',
description=str(ri))
tags='trials')

for ri, row in epoch_table.iterrows():
nwbfile.create_epoch(start_time=row.start,
stop_time=row.end,
timeseries=[running_speed],
tags='stimulus',
description=row.stimulus)
tags='stimulus')

########################################
# 5) In the brain observatory, a two-photon microscope is used to acquire images of the calcium activity of neurons
Expand Down
2 changes: 1 addition & 1 deletion docs/gallery/domain/ophys.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@

image_series = TwoPhotonSeries(name='test_iS', source='Ca2+ imaging example', dimension=[2],
external_file=['images.tiff'], imaging_plane=imaging_plane,
starting_frame=[0], format='tiff', timestamps=list())
starting_frame=[0], format='tiff', starting_time=0.0, rate=1.0)
nwbfile.add_acquisition(image_series)


Expand Down
4 changes: 2 additions & 2 deletions docs/gallery/general/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@
# and the fifth argument is a list of all the :py:class:`~pynwb.base.TimeSeries` that the epoch applies
# to.

nwbfile.create_epoch('the first epoch', 2.0, 4.0, ['first', 'example'], [test_ts, mod_ts])
nwbfile.create_epoch('the second epoch', 6.0, 8.0, ['second', 'example'], [test_ts, mod_ts])
nwbfile.create_epoch(2.0, 4.0, ['first', 'example'], [test_ts, mod_ts])
nwbfile.create_epoch(6.0, 8.0, ['second', 'example'], [test_ts, mod_ts])

####################
# .. _basic_trials:
Expand Down
8 changes: 8 additions & 0 deletions docs/source/make_roundtrip_test.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ with the following command::

$ python test.py -i

The roundtrip test will generate a new NWB file with the name ``test_<CLASS_NAME>.nwb`` where ``CLASS_NAME`` is
the class name of the :py:class:`~pynwb.form.container.Container` class you are roundtripping. The test
will write an NWB file with an instance of the container to disk, read this instance back in, and compare it
to the instance that was used for writing to disk. Once the test is complete, the NWB file will be deleted.
You can keep the NWB file around after the test completes by setting the environment variable ``CLEAN_NWB``
to ``0``, ``false``, ``False``, or ``FALSE``. Setting ``CLEAN_NWB`` to any value not listed here will
cause the roundtrip NWB file to be deleted once the test has completed

Before writing tests, we also suggest you familiarize yourself with the
:ref:`software architecture <software-architecture>` of PyNWB.

Expand Down
117 changes: 74 additions & 43 deletions src/pynwb/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,11 +349,21 @@ def add_vector(self, arg):
self.target.extend(arg)
self.data.append(len(self.target))

def __getitem__(self, arg):
def __getitem_helper(self, arg):
start = 0 if arg == 0 else self.data[arg-1]
end = self.data[arg]
return self.target[start:end]

def __getitem__(self, arg):
if isinstance(arg, slice):
indices = list(range(*arg.indices(len(self.data))))
ret = list()
for i in indices:
ret.append(self.__getitem_helper(i))
return ret
else:
return self.__getitem_helper(arg)


@register_class('ElementIdentifiers', CORE_NAMESPACE)
class ElementIdentifiers(NWBData):
Expand Down Expand Up @@ -386,7 +396,7 @@ class NWBTable(NWBData):
The class attribute __defaultname__ can also be set to specify a default name
for the table class. If \_\_defaultname\_\_ is not specified, then ``name`` will
need to be specified when the class is instantiated.
'''
''' # noqa: W605

@ExtenderMeta.pre_init
def __build_table_class(cls, name, bases, classdict):
Expand Down Expand Up @@ -847,9 +857,10 @@ def add_row(self, **kwargs):
class DynamicTable(NWBDataInterface):
"""
A column-based table. Columns are defined by the argument *columns*. This argument
must be a list/tuple of TableColumns or a list/tuple of dicts containing the keys
must be a list/tuple of TableColumns and VectorIndexes or a list/tuple of dicts containing the keys
'name' and 'description' that provide the name and description of each column
in the table.
in the table. If specifying columns with a list/tuple of dicts, VectorData columns can
be specified by setting the key 'vector_data' to True.
"""

__nwbfields__ = (
Expand Down Expand Up @@ -881,7 +892,7 @@ def __init__(self, **kwargs):
if columns is not None:
if len(columns) > 0:
if isinstance(columns[0], dict):
columns = tuple(TableColumn(**d) for d in columns)
columns = self.__build_columns(columns)
elif not all(isinstance(c, (VectorData, VectorIndex, TableColumn)) for c in columns):
raise ValueError("'columns' must be a list of TableColumns, VectorData, or VectorIndex")
lens = [len(c) for c in columns if isinstance(c, (TableColumn, VectorIndex))]
Expand Down Expand Up @@ -946,7 +957,38 @@ def __init__(self, **kwargs):
col_dict[col.target.name] = col # use target name for reference and VectorIndex for retrieval

self.__df_cols = [self.id] + [col_dict[name] for name in self.colnames]
self.__colids = {name: i for i, name in enumerate(self.colnames)}
self.__colids = {name: i+1 for i, name in enumerate(self.colnames)}

@staticmethod
def __build_columns(columns, df=None):
tmp = list()
for d in columns:
name = d['name']
desc = d.get('description', 'no description')
data = None
if df is not None:
data = list(df[name].values)
if d.get('vector_data', False):
index_data = None
if data is not None:
index_data = [len(data[0])]
for i in range(1, len(data)):
index_data.append(len(data[i]) + index_data[i-1])
# assume data came in through a DataFrame, so we need
# to concatenate it
tmp_data = list()
for d in data:
tmp_data.extend(d)
data = tmp_data
vdata = VectorData(name, data, description=desc)
vindex = VectorIndex("%s_index" % name, index_data, target=vdata)
tmp.append(vindex)
tmp.append(vdata)
else:
if data is None:
data = list()
tmp.append(TableColumn(name, desc, data=data))
return tmp

def __len__(self):
return len(self.id)
Expand Down Expand Up @@ -981,7 +1023,7 @@ def add_row(self, **kwargs):
for colname, colnum in self.__colids.items():
if colname not in data:
raise ValueError("column '%s' missing" % colname)
c = self.columns[colnum]
c = self.__df_cols[colnum]
if isinstance(c, VectorIndex):
c.add_vector(data[colname])
else:
Expand Down Expand Up @@ -1016,7 +1058,7 @@ def add_column(self, **kwargs):
self.add_child(col)
if len(data) != len(self.id):
raise ValueError("column must have the same number of rows as 'id'")
self.__colids[name] = len(self.columns)
self.__colids[name] = len(self.__df_cols)
self.fields['colnames'] = tuple(list(self.colnames)+[name])
self.fields['columns'] = tuple(list(self.columns)+[col])
self.__df_cols.append(col)
Expand Down Expand Up @@ -1056,7 +1098,7 @@ def add_vector_column(self, **kwargs):
self.add_child(data)
if len(index) != len(self.id):
raise ValueError("'index' must have the same number of rows as 'id'")
self.__colids[name] = len(self.columns)
self.__colids[name] = len(self.__df_cols)
self.fields['colnames'] = tuple(list(self.colnames)+[name])
self.fields['columns'] = tuple(list(self.columns)+[index, data])
self.__df_cols.append(index)
Expand Down Expand Up @@ -1088,43 +1130,35 @@ def __getitem__(self, key):
arg1 = key[0]
arg2 = key[1]
if isinstance(arg2, str):
arg2 = self.__colids[arg2] + 1
arg2 = self.__colids[arg2]
ret = self.__df_cols[arg2][arg1]
else:
arg = key
if isinstance(arg, str):
# index by one string, return column
ret = self.__df_cols[self.__colids[arg]+1]
# # keeping this around in case anyone wants to resurrect it
# dt = self.get_dtype(ret)[1]
# ret = np.array(ret.data, dtype=dt)
ret = self.__df_cols[self.__colids[arg]]
elif isinstance(arg, (int, np.int8, np.int16, np.int32, np.int64)):
# index by int, return row
ret = tuple(col[arg] for col in self.__df_cols)
# # keeping this around in case anyone wants to resurrect it
# dt = [self.get_dtype(col) for col in self.__df_cols]
# ret = np.array([ret], dtype=dt)

elif isinstance(arg, (tuple, list)):
# index by a list of ints, return multiple rows
# # keeping this around in case anyone wants to resurrect it
# dt = [self.get_dtype(col) for col in self.__df_cols]
# ret = np.zeros((len(arg),), dtype=dt)
# for name, col in zip(self.__df_colnames, self.__df_cols):
# ret[name] = col[arg]
ret = list()
for i in arg:
ret.append(tuple(col[i] for col in self.__df_cols))

return ret

def __contains__(self, val):
return val in self.__colids

def to_dataframe(self):
'''Produce a pandas DataFrame containing this table's data.
'''

data = {}
for column in self.columns:
data[column.name] = column.data
for name in self.colnames:
col = self.__df_cols[self.__colids[name]]
data[name] = col[:]

return pd.DataFrame(data, index=pd.Index(name=self.id.name, data=self.id.data))

Expand All @@ -1146,9 +1180,9 @@ def to_dataframe(self):
'default': ''
},
{
'name': 'column_descriptions',
'type': dict,
'help': 'a dictionary mapping column names to descriptions of their contents',
'name': 'columns',
'type': (list, tuple),
'help': 'a list/tuple of dictionaries specifying columns in the table',
'default': None
},
allow_extra=True
Expand All @@ -1157,36 +1191,33 @@ def from_dataframe(cls, **kwargs):
'''Construct an instance of DynamicTable (or a subclass) from a pandas DataFrame. The columns of the resulting
table are defined by the columns of the dataframe and the index by the dataframe's index (make sure it has a
name!) or by a column whose name is supplied to the index_column parameter. We recommend that you supply
column_descriptions - a dictionary mapping column names to string descriptions - to help others understand
the contents of your table.
*columns* - a list/tuple of dictionaries containing the name and description of the column- to help others
understand the contents of your table. See :py:class:`~pynwb.core.DynamicTable` for more details on *columns*.
'''

df = kwargs.pop('df')
name = kwargs.pop('name')
source = kwargs.pop('source')
index_column = kwargs.pop('index_column')
table_description = kwargs.pop('table_description')
column_descriptions = kwargs.pop('column_descriptions')
columns = kwargs.pop('columns')

if column_descriptions is None:
column_descriptions = {}
if columns is None:
columns = [{'name': s} for s in df.columns]
else:
columns = list(columns)
existing = set(c['name'] for c in columns)
for c in df.columns:
if c not in existing:
columns.append({'name': c})

if index_column is not None:
ids = ElementIdentifiers(name=index_column, data=df[index_column].values.tolist())
else:
index_name = df.index.name if df.index.name is not None else 'id'
ids = ElementIdentifiers(name=index_name, data=df.index.values.tolist())

columns = []
for column_name in df.columns:
if index_column is not None and column_name == index_column:
continue

columns.append({
'name': column_name,
'data': df[column_name].values.tolist(),
'description': column_descriptions.get(column_name, '')
})
columns = cls.__build_columns(columns, df=df)

return cls(name=name, source=source, id=ids, columns=columns, description=table_description, **kwargs)

Expand Down
Loading

0 comments on commit 5bcdc83

Please sign in to comment.