From f59bde19defa97801817b1f0de3d389634589e65 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 20 Sep 2022 11:47:06 -0400 Subject: [PATCH 1/5] integrated cached namespaces --- .github/workflows/dev_gallery.yml | 35 ++++++++++++++ nwbinspector/nwbinspector.py | 78 +++++++++++++++++++++++++++---- 2 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/dev_gallery.yml diff --git a/.github/workflows/dev_gallery.yml b/.github/workflows/dev_gallery.yml new file mode 100644 index 000000000..8b1e0798f --- /dev/null +++ b/.github/workflows/dev_gallery.yml @@ -0,0 +1,35 @@ +name: Development Version Gallery +on: + schedule: + - cron: "0 0 * * *" # daily + pull_request: + +jobs: + build-and-test: + name: Testing against past PyNWB versions + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: s-weigand/setup-conda@v1 + with: + update-conda: true + python-version: 3.9 + conda-channels: conda-forge + - uses: actions/checkout@v2 + - run: git fetch --prune --unshallow --tags + - name: Install pytest + run: | + pip install pytest + pip install pytest-cov + - name: Install package + run: | + pip install -e . + pip install git+https://github.com/neurodatawithoutborders/pynwb@dev + pip install dandi + - name: Uninstall h5py + run: pip uninstall -y h5py + - name: Install ROS3 + run: conda install -c conda-forge h5py + - name: Run pytest with coverage + run: pytest -rsx diff --git a/nwbinspector/nwbinspector.py b/nwbinspector/nwbinspector.py index d496a9465..562aee1c6 100644 --- a/nwbinspector/nwbinspector.py +++ b/nwbinspector/nwbinspector.py @@ -20,6 +20,7 @@ import yaml from tqdm import tqdm from natsort import natsorted +from packaging.version import Version from . import available_checks from .inspector_tools import ( @@ -30,7 +31,14 @@ ) from .register_checks import InspectorMessage, Importance from .tools import get_s3_urls_and_dandi_paths -from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read, calculate_number_of_cpu +from .utils import ( + FilePathType, + PathType, + OptionalListOfStrings, + robust_s3_read, + calculate_number_of_cpu, + get_package_version, +) INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml") @@ -231,6 +239,7 @@ def inspect_all_cli( json_file_path: Optional[str] = None, n_jobs: int = 1, skip_validate: bool = False, + use_cached_namespaces: Optional[str] = None, detailed: bool = False, progress_bar: Optional[str] = None, stream: bool = False, @@ -271,6 +280,7 @@ def inspect_all_cli( config=config, n_jobs=n_jobs, skip_validate=skip_validate, + use_cached_namespaces=strtobool(use_cached_namespaces) if use_cached_namespaces is not None else None, progress_bar=progress_bar, stream=stream, version_id=version_id, @@ -299,6 +309,7 @@ def inspect_all( importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION, n_jobs: int = 1, skip_validate: bool = False, + use_cached_namespaces: Optional[str] = None, progress_bar: bool = True, progress_bar_options: Optional[dict] = None, stream: bool = False, @@ -341,6 +352,10 @@ def inspect_all( skip_validate : bool, optional Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10). The default is False, which is also recommended. + use_cached_namespaces : bool + If not skipping validation, this determines whether to use global namespaces or those cached within the file. + Only available when using PyNWB version 2.1.0 or greater. + Defaults to True. progress_bar : bool, optional Display a progress bar while scanning NWBFiles. Defaults to True. @@ -389,6 +404,8 @@ def inspect_all( # Manual identifier check over all files in the folder path identifiers = defaultdict(list) + filterwarnings(action="ignore", message="No cached namespaces found in .*") + filterwarnings(action="ignore", message="Ignoring cached namespace .*") for nwbfile_path in nwbfiles: with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io: nwbfile = robust_s3_read(io.read) @@ -426,6 +443,7 @@ def inspect_all( nwbfile_path=nwbfile_path, checks=checks, skip_validate=skip_validate, + use_cached_namespaces=use_cached_namespaces, driver=driver, ) ) @@ -439,17 +457,35 @@ def inspect_all( yield message else: for nwbfile_path in nwbfiles_iterable: - for message in inspect_nwb(nwbfile_path=nwbfile_path, checks=checks, driver=driver): + for message in inspect_nwb( + nwbfile_path=nwbfile_path, + checks=checks, + skip_validate=skip_validate, + use_cached_namespaces=use_cached_namespaces, + driver=driver, + ): if stream: message.file_path = nwbfiles[message.file_path] yield message def _pickle_inspect_nwb( - nwbfile_path: str, checks: list = available_checks, skip_validate: bool = False, driver: Optional[str] = None + nwbfile_path: str, + checks: list = available_checks, + skip_validate: bool = False, + use_cached_namespaces: Optional[str] = None, + driver: Optional[str] = None, ): """Auxiliary function for inspect_all to run in parallel using the ProcessPoolExecutor.""" - return list(inspect_nwb(nwbfile_path=nwbfile_path, checks=checks, skip_validate=skip_validate, driver=driver)) + return list( + inspect_nwb( + nwbfile_path=nwbfile_path, + checks=checks, + skip_validate=skip_validate, + use_cached_namespaces=use_cached_namespaces, + driver=driver, + ) + ) def inspect_nwb( @@ -461,6 +497,7 @@ def inspect_nwb( importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION, driver: Optional[str] = None, skip_validate: bool = False, + use_cached_namespaces: Optional[bool] = None, max_retries: int = 10, ) -> List[InspectorMessage]: """ @@ -495,6 +532,10 @@ def inspect_nwb( skip_validate : bool Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10). The default is False, which is also recommended. + use_cached_namespaces : bool + If not skipping validation, this determines whether to use global namespaces or those cached within the file. + Only available when using PyNWB version 2.1.0 or greater. + Defaults to True. max_retries : int, optional When using the ros3 driver to stream data from an s3 path, occasional curl issues can result. AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries. @@ -509,12 +550,15 @@ def inspect_nwb( checks=checks, config=config, ignore=ignore, select=select, importance_threshold=importance_threshold ) nwbfile_path = str(nwbfile_path) + filterwarnings(action="ignore", message="No cached namespaces found in .*") filterwarnings(action="ignore", message="Ignoring cached namespace .*") - - with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io: - if not skip_validate: - validation_errors = pynwb.validate(io=io) + if not skip_validate: + # For back-compatability, the original io usage does not use cached namespaces + if get_package_version(name="pynwb") > Version("2.1.0"): + validation_errors, _ = pynwb.validate( + paths=[nwbfile_path], use_cached_namespaces=use_cached_namespaces or True + ) for validation_error in validation_errors: yield InspectorMessage( message=validation_error.reason, @@ -523,7 +567,23 @@ def inspect_nwb( location=validation_error.location, file_path=nwbfile_path, ) - + else: # pragma: no cover + if use_cached_namespaces is not None: + warn( + "Validation option 'use_cached_namespaces' was specified, " + "but the system does not have PyNWB>=2.1.0 - using global namespaces." + ) + with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io: + validation_errors, _ = pynwb.validate(io=io) + for validation_error in validation_errors: + yield InspectorMessage( + message=validation_error.reason, + importance=Importance.PYNWB_VALIDATION, + check_function_name=validation_error.name, + location=validation_error.location, + file_path=nwbfile_path, + ) + with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io: try: nwbfile = robust_s3_read(command=io.read, max_retries=max_retries) for inspector_message in run_checks(nwbfile=nwbfile, checks=checks): From 09c9275b3a2bea3273a42da1aeccf0a032486b58 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 20 Sep 2022 12:02:48 -0400 Subject: [PATCH 2/5] debug --- nwbinspector/nwbinspector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nwbinspector/nwbinspector.py b/nwbinspector/nwbinspector.py index 562aee1c6..d79e048de 100644 --- a/nwbinspector/nwbinspector.py +++ b/nwbinspector/nwbinspector.py @@ -574,7 +574,7 @@ def inspect_nwb( "but the system does not have PyNWB>=2.1.0 - using global namespaces." ) with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io: - validation_errors, _ = pynwb.validate(io=io) + validation_errors = pynwb.validate(io=io) for validation_error in validation_errors: yield InspectorMessage( message=validation_error.reason, From 0394291b831728ee34980e07bfb1bc550ad5c3ac Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 9 Nov 2022 12:43:21 -0500 Subject: [PATCH 3/5] Apply suggestions from code review --- nwbinspector/nwbinspector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nwbinspector/nwbinspector.py b/nwbinspector/nwbinspector.py index d79e048de..ff270c0b5 100644 --- a/nwbinspector/nwbinspector.py +++ b/nwbinspector/nwbinspector.py @@ -555,7 +555,7 @@ def inspect_nwb( filterwarnings(action="ignore", message="Ignoring cached namespace .*") if not skip_validate: # For back-compatability, the original io usage does not use cached namespaces - if get_package_version(name="pynwb") > Version("2.1.0"): + if get_package_version(name="pynwb") >= Version("2.2.0"): validation_errors, _ = pynwb.validate( paths=[nwbfile_path], use_cached_namespaces=use_cached_namespaces or True ) @@ -571,7 +571,7 @@ def inspect_nwb( if use_cached_namespaces is not None: warn( "Validation option 'use_cached_namespaces' was specified, " - "but the system does not have PyNWB>=2.1.0 - using global namespaces." + "but the system does not have PyNWB>=2.2.0 - using only the global namespaces." ) with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io: validation_errors = pynwb.validate(io=io) From cebcbeb3d28404201b5c65ce80bf1c79b13e1da0 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 9 Nov 2022 12:45:27 -0500 Subject: [PATCH 4/5] Delete dev_gallery.yml --- .github/workflows/dev_gallery.yml | 35 ------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 .github/workflows/dev_gallery.yml diff --git a/.github/workflows/dev_gallery.yml b/.github/workflows/dev_gallery.yml deleted file mode 100644 index 8b1e0798f..000000000 --- a/.github/workflows/dev_gallery.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Development Version Gallery -on: - schedule: - - cron: "0 0 * * *" # daily - pull_request: - -jobs: - build-and-test: - name: Testing against past PyNWB versions - runs-on: ubuntu-latest - strategy: - fail-fast: false - steps: - - uses: s-weigand/setup-conda@v1 - with: - update-conda: true - python-version: 3.9 - conda-channels: conda-forge - - uses: actions/checkout@v2 - - run: git fetch --prune --unshallow --tags - - name: Install pytest - run: | - pip install pytest - pip install pytest-cov - - name: Install package - run: | - pip install -e . - pip install git+https://github.com/neurodatawithoutborders/pynwb@dev - pip install dandi - - name: Uninstall h5py - run: pip uninstall -y h5py - - name: Install ROS3 - run: conda install -c conda-forge h5py - - name: Run pytest with coverage - run: pytest -rsx From c2c2d5cdfd8f7d7c040159fd19f17030d481a3fc Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 9 Nov 2022 12:48:29 -0500 Subject: [PATCH 5/5] Apply suggestions from code review --- src/nwbinspector/nwbinspector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nwbinspector/nwbinspector.py b/src/nwbinspector/nwbinspector.py index 1737986ca..319031bd6 100644 --- a/src/nwbinspector/nwbinspector.py +++ b/src/nwbinspector/nwbinspector.py @@ -358,7 +358,7 @@ def inspect_all( The default is False, which is also recommended. use_cached_namespaces : bool If not skipping validation, this determines whether to use global namespaces or those cached within the file. - Only available when using PyNWB version 2.1.0 or greater. + Only available when using PyNWB version 2.2.0 or greater. Defaults to True. progress_bar : bool, optional Display a progress bar while scanning NWBFiles. @@ -540,7 +540,7 @@ def inspect_nwb( The default is False, which is also recommended. use_cached_namespaces : bool If not skipping validation, this determines whether to use global namespaces or those cached within the file. - Only available when using PyNWB version 2.1.0 or greater. + Only available when using PyNWB version 2.2.0 or greater. Defaults to True. max_retries : int, optional When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.