FIX: Read Nihon Kohden annotation file accurately (mne-tools#13251)

myd7349 · larsoner · web-flow · commit 181fea1e7e0b · 2025-10-22T17:02:48.000Z
Co-authored-by: Eric Larson &lt;larson.eric.d@gmail.com&gt;
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -59,6 +59,7 @@ jobs:
       PYTHON_VERSION: '${{ matrix.python }}'
       MKL_NUM_THREADS: '1'
       OPENBLAS_NUM_THREADS: '1'
+      OMP_NUM_THREADS: '1'
       PYTHONUNBUFFERED: '1'
       MNE_CI_KIND: '${{ matrix.kind }}'
       CI_OS_NAME: '${{ matrix.os }}'
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -88,6 +88,7 @@ stages:
         variables:
           DISPLAY: ':99'
           OPENBLAS_NUM_THREADS: '1'
+          OMP_NUM_THREADS: '1'
           MNE_TEST_ALLOW_SKIP: '^.*(PySide6 causes segfaults).*$'
           MNE_BROWSER_PRECOMPUTE: 'false'
         steps:
diff --git a/doc/changes/dev/13251.bugfix.rst b/doc/changes/dev/13251.bugfix.rst
@@ -0,0 +1 @@
+Read Nihon Kohden annotation file accurately (using sublog parsing), by `Tom Ma`_.
diff --git a/mne/datasets/config.py b/mne/datasets/config.py
@@ -87,7 +87,7 @@
 # update the checksum in the MNE_DATASETS dict below, and change version
 # here: ↓↓↓↓↓↓↓↓
 RELEASES = dict(
-    testing="0.161",
+    testing="0.166",
     misc="0.27",
     phantom_kit="0.2",
     ucl_opm_auditory="0.2",
@@ -115,7 +115,7 @@
 # Testing and misc are at the top as they're updated most often
 MNE_DATASETS["testing"] = dict(
     archive_name=f"{TESTING_VERSIONED}.tar.gz",
-    hash="md5:a32cfb9e098dec39a5f3ed6c0833580d",
+    hash="md5:273c5919cf74198a39146e9cbc146ce0",
     url=(
         "https://codeload.github.com/mne-tools/mne-testing-data/"
         f"tar.gz/{RELEASES['testing']}"
diff --git a/mne/io/nihon/nihon.py b/mne/io/nihon/nihon.py
@@ -276,6 +276,45 @@ def _read_nihon_header(fname):
     return header
 
 
+def _read_event_log_block(fid, t_block, version):
+    fid.seek(0x92 + t_block * 20)
+    data = np.fromfile(fid, np.uint32, 1)
+    if data.size == 0 or data[0] == 0:
+        return
+    t_blk_address = data[0]
+
+    fid.seek(t_blk_address + 0x1)
+    data = np.fromfile(fid, "|S16", 1).astype("U16")
+    if data.size == 0 or data[0] != version:
+        return
+
+    fid.seek(t_blk_address + 0x12)
+    data = np.fromfile(fid, np.uint8, 1)
+    if data.size == 0:
+        return
+    n_logs = data[0]
+
+    fid.seek(t_blk_address + 0x14)
+    return np.fromfile(fid, "|S45", n_logs)
+
+
+def _parse_event_log(event_log):
+    t_desc = event_log[:20]
+    hour, minute, second = (
+        int(event_log[20:22]),
+        int(event_log[22:24]),
+        int(event_log[24:26]),
+    )
+    t_onset = hour * 3600 + minute * 60 + second
+    return t_desc, t_onset
+
+
+def _parse_sub_event_log(sub_event_log):
+    t_sub_desc = sub_event_log[:20]
+    t_sub_onset = int(sub_event_log[24:30]) / 1e6
+    return t_sub_desc, t_sub_onset
+
+
 def _read_nihon_annotations(fname):
     fname = _ensure_path(fname)
     log_fname = fname.with_suffix(".LOG")
@@ -292,27 +331,32 @@ def _read_nihon_annotations(fname):
         n_logblocks = np.fromfile(fid, np.uint8, 1)[0]
         all_onsets = []
         all_descriptions = []
+        may_have_sub_blocks = n_logblocks <= 21
         for t_block in range(n_logblocks):
-            fid.seek(0x92 + t_block * 20)
-            t_blk_address = np.fromfile(fid, np.uint32, 1)[0]
-            fid.seek(t_blk_address + 0x12)
-            n_logs = np.fromfile(fid, np.uint8, 1)[0]
-            fid.seek(t_blk_address + 0x14)
-            t_logs = np.fromfile(fid, "|S45", n_logs)
-            for t_log in t_logs:
+            t_logs = _read_event_log_block(fid, t_block, version)
+            t_sub_logs = None
+            if may_have_sub_blocks:
+                t_sub_logs = _read_event_log_block(fid, t_block + 22, version)
+
+            for li, t_log in enumerate(t_logs):
+                t_desc, t_onset = _parse_event_log(t_log)
+                if t_sub_logs is not None and t_sub_logs.size == t_logs.size:
+                    t_sub_desc, t_sub_onset = _parse_sub_event_log(t_sub_logs[li])
+                    t_desc += t_sub_desc
+                    t_onset += t_sub_onset
+
+                t_desc = t_desc.rstrip(b"\x00")
                 for enc in _encodings:
                     try:
-                        t_log = t_log.decode(enc)
+                        t_desc = t_desc.decode(enc)
                     except UnicodeDecodeError:
                         pass
                     else:
                         break
                 else:
                     warn(f"Could not decode log as one of {_encodings}")
                     continue
-                t_desc = t_log[:20].strip("\x00")
-                t_onset = datetime.strptime(t_log[20:26], "%H%M%S")
-                t_onset = t_onset.hour * 3600 + t_onset.minute * 60 + t_onset.second
+
                 all_onsets.append(t_onset)
                 all_descriptions.append(t_desc)
 
diff --git a/mne/io/nihon/tests/test_nihon.py b/mne/io/nihon/tests/test_nihon.py
@@ -3,7 +3,7 @@
 # Copyright the MNE-Python contributors.
 
 import pytest
-from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_allclose
 
 from mne.datasets import testing
 from mne.io import read_raw_edf, read_raw_nihon
@@ -27,24 +27,22 @@ def test_nihon_eeg():
     _test_raw_reader(read_raw_nihon, fname=fname, test_scaling=False)
     fname_edf = data_path / "NihonKohden" / "MB0400FU.EDF"
     raw_edf = read_raw_edf(fname_edf, preload=True)
+    raw_edf.drop_channels(["Events/Markers"])
 
     assert raw._data.shape == raw_edf._data.shape
     assert raw.info["sfreq"] == raw.info["sfreq"]
-    # ch names and order are switched in the EDF
-    edf_ch_names = {x: x.split(" ")[1].replace("-Ref", "") for x in raw_edf.ch_names}
+    # a couple of ch names differ in the EDF
+    edf_ch_names = {"EEG Mark1": "$A2", "EEG Mark2": "$A1"}
     raw_edf.rename_channels(edf_ch_names)
     assert raw.ch_names == raw_edf.ch_names
 
-    for i, an1 in enumerate(raw.annotations):
-        # EDF has some weird annotations, which are not in the LOG file
-        an2 = raw_edf.annotations[i * 2 + 1]
+    assert len(raw.annotations) == len(raw_edf.annotations)
+    for an1, an2 in zip(raw.annotations, raw_edf.annotations):
         assert an1["onset"] == an2["onset"]
         assert an1["duration"] == an2["duration"]
-        # Also, it prepends 'Segment: ' to some annotations
-        t_desc = an2["description"].replace("Segment: ", "")
-        assert an1["description"] == t_desc
+        assert an1["description"] == an2["description"].rstrip()
 
-    assert_array_almost_equal(raw._data, raw_edf._data)
+    assert_allclose(raw.get_data(), raw_edf.get_data())
 
     with pytest.raises(ValueError, match="Not a valid Nihon Kohden EEG file"):
         raw = read_raw_nihon(fname_edf, preload=True)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Read Nihon Kohden annotation file accurately (using sublog parsing), by `Tom Ma`_.