mne-tools · larsoner · Oct 19, 2017 · Jul 21, 2017 · Jul 31, 2017 · Aug 2, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -45,11 +45,11 @@ matrix:
         # 2.7 Old dependencies
         - os: linux
           env: PYTHON_VERSION=2.7
-               CONDA_DEPENDENCIES="numpy=1.8 scipy=0.12 matplotlib=1.3 pandas=0.12 scikit-learn=0.14 nose pytest pytest-cov"
+               CONDA_DEPENDENCIES="numpy=1.8 scipy=0.12 matplotlib=1.3 pandas=0.13 scikit-learn=0.14 nose pytest pytest-cov"
                SPLIT=0
         - os: linux
           env: PYTHON_VERSION=2.7
-               CONDA_DEPENDENCIES="numpy=1.8 scipy=0.12 matplotlib=1.3 pandas=0.12 scikit-learn=0.14 nose pytest pytest-cov"
+               CONDA_DEPENDENCIES="numpy=1.8 scipy=0.12 matplotlib=1.3 pandas=0.13 scikit-learn=0.14 nose pytest pytest-cov"
                SPLIT=1
 
         # Minimal

diff --git a/README.rst b/README.rst
@@ -79,7 +79,7 @@ For full functionality, some functions require:
 
   - scikit-learn >= 0.18
   - nibabel >= 2.1.0
-  - pandas >= 0.12
+  - pandas >= 0.13
 
 To use NVIDIA CUDA for resampling and FFT FIR filtering, you will also need
 to install the NVIDIA CUDA SDK, pycuda, and scikits.cuda. See the

diff --git a/circle.yml b/circle.yml
@@ -27,6 +27,7 @@ dependencies:
     - "~/mne_data/MNE-spm-face"
     - "~/mne_data/MNE-somato-data"
     - "~/mne_data/MNE-brainstorm-data"
+    - "~/mne_data/MNE-kiloword-data"
     - "~/mne_data/MEGSIM"
     - "~/mne_data/MNE-eegbci-data"
     - "~/mne_data/mTRF_1.5"
@@ -131,6 +132,12 @@ dependencies:
             if [[ $(cat $FNAME | grep -x ".*datasets.*misc.*" | wc -l) -gt 0 ]]; then
               python -c "import mne; print(mne.datasets.misc.data_path(update_path=True))";
             fi;
+            if [[ $(cat $FNAME | grep -x ".*datasets.*testing.*" | wc -l) -gt 0 ]]; then
+              python -c "import mne; print(mne.datasets.testing.data_path(update_path=True))";
+            fi;
+            if [[ $(cat $FNAME | grep -x ".*datasets.*kiloword.*" | wc -l) -gt 0 ]]; then
+              python -c "import mne; print(mne.datasets.kiloword.data_path(update_path=True))";
+            fi;
             if [[ $(cat $FNAME | grep -x ".*datasets.*mtrf.*" | wc -l) -gt 0 ]]; then
               python -c "import mne; print(mne.datasets.mtrf.data_path(update_path=True))";
             fi;

diff --git a/doc/conf.py b/doc/conf.py
@@ -286,6 +286,7 @@
     'nibabel': ('http://nipy.org/nibabel', None),
     'nilearn': ('http://nilearn.github.io', None),
     'surfer': ('https://pysurfer.github.io/', None),
+    'pandas': ('https://pandas.pydata.org/pandas-docs/stable', None),
 }
 
 examples_dirs = ['../examples', '../tutorials']

diff --git a/doc/documentation.rst b/doc/documentation.rst
@@ -138,6 +138,7 @@ There are also **examples**, which contain a short use-case to highlight MNE-fun
 
     manual/io.rst
     auto_tutorials/plot_creating_data_structures.rst
+    auto_tutorials/plot_metadata_epochs.rst
     auto_tutorials/plot_modifying_data_inplace.rst
     auto_tutorials/plot_ecog.rst
     manual/memory.rst

diff --git a/doc/manual/datasets_index.rst b/doc/manual/datasets_index.rst
@@ -196,6 +196,14 @@ functions in MNE and does not contain useful metadata for analysis.
       in electrocorticography.
 
 
+Kiloword dataset
+================
+:func:`mne.datasets.kiloword.data_path`.
+
+This dataset consists of averaged EEG data from 75 subjects performing a lexical decision
+task on 960 English words [6]_. The words are richly annotated, and can be used for e.g.
+multiple regression estimation of EEG correlates of printed word processing.
+
 References
 ==========
 
@@ -205,6 +213,8 @@ References
 
 .. [3] Goldberger AL, Amaral LAN, Glass L, Hausdorff JM, Ivanov PCh, Mark RG, Mietus JE, Moody GB, Peng C-K, Stanley HE. (2000) PhysioBank, PhysioToolkit, and PhysioNet: Components of a New Research Resource for Complex Physiologic Signals. Circulation 101(23):e215-e220
 
-.. [4] Cichy, R. M., Pantazis, D., & Oliva, A. "Resolving human object recognition in space and time." Nature neuroscience (2014): 17(3), 455-462
+.. [4] Cichy, R. M., Pantazis, D., & Oliva, A. Resolving human object recognition in space and time. Nature Neuroscience (2014): 17(3), 455-462
 
 .. [5] Crosse, M. J., Di Liberto, G. M., Bednar, A., & Lalor, E. C. The Multivariate Temporal Response Function (mTRF) Toolbox: A MATLAB Toolbox for Relating Neural Signals to Continuous Stimuli. Frontiers in Human Neuroscience (2016): 10.
+
+.. [6] Dufau, S., Grainger, J., Midgley, KJ., Holcomb, PJ. A thousand words are worth a picture: Snapshots of printed-word processing in an event-related potential megastudy. Psychological science, 2015
diff --git a/doc/python_reference.rst b/doc/python_reference.rst
@@ -194,6 +194,7 @@ Datasets
    eegbci.load_data
    fetch_hcp_mmp_parcellation
    hf_sef.data_path
+   kiloword.data_path
    megsim.data_path
    megsim.load_data
    misc.data_path

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -19,7 +19,7 @@ Current
 Changelog
 ~~~~~~~~~
 
-- Nothing yet
+- Add support for metadata in :class:`mne.Epochs` by `Chris Holdgraf`_, `Alex Gramfort`_, `Jona Sassenhagen`_, and `Eric Larson`_
 
 Bug
 ~~~

diff --git a/examples/preprocessing/plot_metadata_query.py b/examples/preprocessing/plot_metadata_query.py
@@ -0,0 +1,108 @@
+"""
+==================================
+Querying epochs with rich metadata
+==================================
+
+Selecting a subset of epochs based on rich metadata.
+
+MNE allows you to include metadata along with your :class:`mne.Epochs` objects.
+This is in the form of a :class:`pandas.DataFrame` that has one row for each
+event, and an arbitrary number of columns corresponding to different
+features that were collected. Columns may be of type int, float, or str.
+
+If an :class:`mne.Epochs` object has a metadata attribute, you can select
+subsets of epochs by using pandas query syntax directly. Here we'll show
+a few examples of how this looks.
+"""
+
+# Authors: Chris Holdgraf <[email protected]>
+#          Jona Sassenhagen <[email protected]>
+#          Eric Larson <[email protected]>
+
+# License: BSD (3-clause)
+
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+import mne
+
+# First load some data
+events = mne.read_events(os.path.join(mne.datasets.sample.data_path(),
+                         'MEG/sample/sample_audvis_raw-eve.fif'))
+raw = mne.io.read_raw_fif(os.path.join(mne.datasets.sample.data_path(),
+                          'MEG/sample/sample_audvis_raw.fif'))
+
+# We'll create some dummy names for each event type
+event_id = {'Auditory/Left': 1, 'Auditory/Right': 2,
+            'Visual/Left': 3, 'Visual/Right': 4,
+            'smiley': 5, 'button': 32}
+event_id_rev = {val: key for key, val in event_id.items()}
+
+sides, kinds = [], []
+for ev in events:
+    split = event_id_rev[ev[2]].lower().split('/')
+    if len(split) == 2:
+        kind, side = split
+    else:
+        kind = split[0]
+        side = 'both'
+    kinds.append(kind)
+    sides.append(side)
+
+
+# Here's a helper function we'll use later
+def plot_query_results(query):
+    fig = epochs[query].average().plot(show=False)
+    title = fig.axes[0].get_title()
+    add = 'Query: {}\nNum Epochs: {}'.format(query, len(epochs[query]))
+    fig.axes[0].set(title='\n'.join([add, title]))
+    plt.show()
+
+
+###############################################################################
+# First we'll create our metadata object. This should be a
+# :class:`pandas.DataFrame` with each row corresponding to an event.
+#
+# .. warning:: The Dataframe Index can change during MNE I/O operations, so
+#              do not rely on it to query your metadata.
+
+metadata = {'event_time': events[:, 0] / raw.info['sfreq'],
+            'trial_number': range(len(events)),
+            'kind': kinds,
+            'side': sides}
+metadata = pd.DataFrame(metadata)
+metadata.head()
+
+###############################################################################
+# We can use this metadata object in the construction of an :class:`mne.Epochs`
+# object. The metadata will then exist as an attribute:
+
+epochs = mne.Epochs(raw, events, metadata=metadata, preload=True)
+print(epochs.metadata.head())
+
+###############################################################################
+# You can select rows by passing various queries to the Epochs object. For
+# example, you can select a subset of events based on the value of a column.
+
+query = 'kind == "auditory"'
+plot_query_results(query)
+
+###############################################################################
+# If a column has numeric values, you can also use numeric-style queries:
+
+query = 'trial_number < 10'
+plot_query_results(query)
+
+###############################################################################
+# It is possible to chain these queries together, giving you more expressive
+# ways to select particular epochs:
+
+query = 'trial_number < 10 and side == "left"'
+plot_query_results(query)
+
+###############################################################################
+# Any query that works with ``DataFrame.query`` will work for selecting epochs.
+
+plot_events = ['smiley', 'button']
+query = 'kind in {}'.format(plot_events)
+plot_query_results(query)
diff --git a/mne/datasets/__init__.py b/mne/datasets/__init__.py
@@ -6,6 +6,7 @@
 from . import fieldtrip_cmc
 from . import brainstorm
 from . import visual_92_categories
+from . import kiloword
 from . import eegbci
 from . import hf_sef
 from . import megsim

diff --git a/mne/datasets/kiloword/__init__.py b/mne/datasets/kiloword/__init__.py
@@ -0,0 +1,3 @@
+"""MNE visual_92_categories dataset."""
+
+from .kiloword import data_path, get_version
diff --git a/mne/datasets/kiloword/kiloword.py b/mne/datasets/kiloword/kiloword.py
@@ -0,0 +1,55 @@
+# License: BSD Style.
+
+from ...utils import verbose
+from ..utils import _data_path, _get_version, _version_doc
+
+
+@verbose
+def data_path(path=None, force_update=False, update_path=True, download=True,
+              verbose=None):
+    """
+    Get path to local copy of the kiloword dataset.
+
+    This is the dataset from [1]_.
+
+    Parameters
+    ----------
+    path : None | str
+        Location of where to look for the kiloword data storing
+        location. If None, the environment variable or config parameter
+        MNE_DATASETS_KILOWORD_PATH is used. If it doesn't exist,
+        the "mne-python/examples" directory is used. If the
+        kiloword dataset is not found under the given path (e.g.,
+        as "mne-python/examples/MNE-kiloword-data"), the data
+        will be automatically downloaded to the specified folder.
+    force_update : bool
+        Force update of the dataset even if a local copy exists.
+    update_path : bool | None
+        If True, set the MNE_DATASETS_KILOWORD_PATH in mne-python
+        config to the given path. If None, the user is prompted.
+    verbose : bool, str, int, or None
+        If not None, override default verbose level (see mne.verbose).
+
+    Returns
+    -------
+    path : list of str
+        Local path to the given data file. This path is contained inside a list
+        of length one, for compatibility.
+
+    References
+    ----------
+    .. [1] Dufau, S., Grainger, J., Midgley, KJ., Holcomb, PJ. A thousand
+       words are worth a picture: Snapshots of printed-word processing in an
+       event-related potential megastudy. Psychological science, 2015
+    """
+    return _data_path(path=path, force_update=force_update,
+                      update_path=update_path, name='kiloword',
+                      download=download)
+
+
+def get_version():
+    """Get dataset version."""
+    return _get_version('kiloword')
+
+
+get_version.__doc__ = _version_doc.format(name='kiloword')
diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py
@@ -226,6 +226,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
         'testing': 'MNE_DATASETS_TESTING_PATH',
         'multimodal': 'MNE_DATASETS_MULTIMODAL_PATH',
         'visual_92_categories': 'MNE_DATASETS_VISUAL_92_CATEGORIES_PATH',
+        'kiloword': 'MNE_DATASETS_KILOWORD_PATH',
         'mtrf': 'MNE_DATASETS_MTRF_PATH',
         'fieldtrip_cmc': 'MNE_DATASETS_FIELDTRIP_CMC_PATH'
     }[name]
@@ -260,14 +261,16 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
         visual_92_categories=[
             'https://osf.io/8ejrs/download',
             'https://osf.io/t4yjp/download'],
-        mtrf="https://superb-dca2.dl.sourceforge.net/project/aespa/"
-             "mTRF_1.5.zip",
+        mtrf='https://superb-dca2.dl.sourceforge.net/project/aespa/'
+             'mTRF_1.5.zip',
+        kiloword='https://osf.io/qkvf9/download',
         fieldtrip_cmc='ftp://ftp.fieldtriptoolbox.org/pub/fieldtrip/'
                       'tutorial/SubjectCMC.zip',
     )
     # filename of the resulting downloaded archive (only needed if the URL
     # name does not match resulting filename)
     archive_names = dict(
+        kiloword='MNE-kiloword-data.tar.gz',
         misc='mne-misc-data-%s.tar.gz' % releases['misc'],
         multimodal='MNE-multimodal-data.tar.gz',
         sample='MNE-sample-data-processed.tar.gz',
@@ -312,6 +315,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
         multimodal='26ec847ae9ab80f58f204d09e2c08367',
         visual_92_categories=['74f50bbeb65740903eadc229c9fa759f',
                               '203410a98afc9df9ae8ba9f933370e20'],
+        kiloword='3a124170795abbd2e48aae8727e719a8',
         mtrf='273a390ebbc48da2c3184b01a82e4636',
         fieldtrip_cmc='6f9fd6520f9a66e20994423808d2528c'
     )
@@ -501,7 +505,8 @@ def has_dataset(name):
         'spm': 'MNE-spm-face',
         'multimodal': 'MNE-multimodal-data',
         'testing': 'MNE-testing-data',
-        'visual_92_categories': 'visual_92_categories-data',
+        'visual_92_categories': 'MNE-visual_92_categories-data',
+        'kiloword': 'MNE-kiloword-data',
     }[name]
     archive_name = None
     if name == 'brainstorm':
@@ -518,7 +523,8 @@ def _download_all_example_data(verbose=True):
     # verbose=True by default so we get nice status messages
     # Consider adding datasets from here to CircleCI for PR-auto-build
     from . import (sample, testing, misc, spm_face, somato, brainstorm, megsim,
-                   eegbci, multimodal, hf_sef, mtrf, fieldtrip_cmc)
+                   eegbci, multimodal, hf_sef, mtrf, fieldtrip_cmc,
+                   kiloword)
     sample.data_path()
     testing.data_path()
     misc.data_path()
@@ -528,6 +534,7 @@ def _download_all_example_data(verbose=True):
     multimodal.data_path()
     mtrf.data_path()
     fieldtrip_cmc.data_path()
+    kiloword.data_path()
     sys.argv += ['--accept-brainstorm-license']
     try:
         brainstorm.bst_raw.data_path()

diff --git a/mne/datasets/visual_92_categories/__init__.py b/mne/datasets/visual_92_categories/__init__.py
@@ -1,5 +1,3 @@
 """MNE visual_92_categories dataset."""
 
-from .visual_92_categories import (data_path, has_visual_92_categories_data,
-                                   get_version,
-                                   requires_visual_92_categories_data)
+from .visual_92_categories import data_path, get_version
diff --git a/mne/datasets/visual_92_categories/visual_92_categories.py b/mne/datasets/visual_92_categories/visual_92_categories.py
@@ -1,16 +1,7 @@
 # License: BSD Style.
 
-from functools import partial
-
-import numpy as np
-
-from ...utils import verbose, get_config
-from ..utils import (has_dataset, _data_path, _data_path_doc, _get_version,
-                     _version_doc)
-
-
-has_visual_92_categories_data = partial(has_dataset,
-                                        name='visual_92_categories')
+from ...utils import verbose
+from ..utils import _data_path, _data_path_doc, _get_version, _version_doc
 
 
 @verbose
@@ -56,6 +47,7 @@ def data_path(path=None, force_update=False, update_path=True, download=True,
                       update_path=update_path, name='visual_92_categories',
                       download=download)
 
+
 data_path.__doc__ = _data_path_doc.format(
     name='visual_92_categories', conf='MNE_DATASETS_VISUAL_92_CATEGORIES_PATH')
 
@@ -64,15 +56,5 @@ def get_version():
     """Get dataset version."""
     return _get_version('visual_92_categories')
 
-get_version.__doc__ = _version_doc.format(name='visual_92_categories')
-
 
-# Allow forcing of visual_92_categories dataset skip
-def _skip_visual_92_categories_data():
-    skip_testing = (get_config('MNE_SKIP_VISUAL_92_CATEGORIES_DATASET_TESTS',
-                               'false') == 'true')
-    skip = skip_testing or not has_visual_92_categories_data()
-    return skip
-
-requires_visual_92_categories_data = np.testing.dec.skipif(
-    _skip_visual_92_categories_data, 'Requires visual_92_categories dataset')
+get_version.__doc__ = _version_doc.format(name='visual_92_categories')
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""MNE visual_92_categories dataset."""

		from .kiloword import data_path, get_version