From 41bb87d36eaefb2bbcb45f65c6784db94e2af39f Mon Sep 17 00:00:00 2001 From: goanpeca Date: Wed, 23 Sep 2020 18:41:08 -0500 Subject: [PATCH 1/7] Use fastjsonschema if installed and tests. --- nbformat/__init__.py | 4 +-- nbformat/tests/many_tracebacks.ipynb | 46 +++++++++++++++++++++++++ nbformat/tests/test_validator.py | 30 +++++++++++++++++ nbformat/validator.py | 50 +++++++++++++++++++++++----- 4 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 nbformat/tests/many_tracebacks.ipynb diff --git a/nbformat/__init__.py b/nbformat/__init__.py index fa05fc27..24cf75e3 100644 --- a/nbformat/__init__.py +++ b/nbformat/__init__.py @@ -74,7 +74,7 @@ def reads(s, as_version, **kwargs): if as_version is not NO_CONVERT: nb = convert(nb, as_version) try: - validate(nb) + validate(nb, use_fast=True) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -104,7 +104,7 @@ def writes(nb, version=NO_CONVERT, **kwargs): else: version, _ = reader.get_version(nb) try: - validate(nb) + validate(nb, use_fast=True) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) diff --git a/nbformat/tests/many_tracebacks.ipynb b/nbformat/tests/many_tracebacks.ipynb new file mode 100644 index 00000000..a37eda86 --- /dev/null +++ b/nbformat/tests/many_tracebacks.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'iAmNotDefined' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0miAmNotDefined\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'iAmNotDefined' is not defined" + ] + } + ], + "source": [ + "# Imagine this cell called a function which runs things on a cluster and you have an error" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index 308d98a2..67750d3c 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -4,12 +4,18 @@ # Distributed under the terms of the Modified BSD License. import os +import time from .base import TestsBase from jsonschema import ValidationError from nbformat import read from ..validator import isvalid, validate, iter_validate +try: + import fastjsonschema +except ImportError: + fastjsonschema = None + class TestValidator(TestsBase): @@ -118,3 +124,27 @@ def test_validation_no_version(self): """Test that an invalid notebook with no version fails validation""" with self.assertRaises(ValidationError) as e: validate({'invalid': 'notebook'}) + + def test_fast_validation(self): + """ + Test that valid notebook with too many outputs is parsed ~12 times + faster with fastjsonschema. + """ + if fastjsonschema: + with self.fopen(u'many_tracebacks.ipynb', u'r') as f: + nb = read(f, as_version=4) + + # Multiply output + base_output = nb["cells"][0]["outputs"][0] + for i in range(50000): + nb["cells"][0]["outputs"].append(base_output) + + start_time = time.time() + validate(nb, use_fast=True) + fast_time = time.time() - start_time + + start_time = time.time() + validate(nb) + slow_time = time.time() - start_time + + self.assertGreater(slow_time / fast_time, 12) diff --git a/nbformat/validator.py b/nbformat/validator.py index 1be77072..0ea7ef84 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -21,11 +21,20 @@ """ raise ImportError(verbose_msg) from e +# Use fastjsonschema if installed +try: + import fastjsonschema + from fastjsonschema import JsonSchemaException +except ImportError: + fastjsonschema = None + JsonSchemaException = ValidationError + from ipython_genutils.importstring import import_item from .reader import get_version, reads validators = {} +fast_validators = {} def _relax_additional_properties(obj): """relax any `additionalProperties`""" @@ -50,7 +59,7 @@ def _allow_undefined(schema): ) return schema -def get_validator(version=None, version_minor=None, relax_add_props=False): +def get_validator(version=None, version_minor=None, relax_add_props=False, use_fast=False): """Load the JSON schema into a Validator""" if version is None: from . import current_nbformat @@ -77,6 +86,10 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): validators[version_tuple] = Validator(schema_json) + # If fastjsonschema is installed use it to validate + if use_fast and fastjsonschema is not None and version_tuple not in fast_validators: + fast_validators[version_tuple] = fastjsonschema.compile(schema_json) + if relax_add_props: try: schema_json = _get_schema_json(v, version=version, version_minor=version_minor) @@ -88,7 +101,15 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): schema_json = _relax_additional_properties(schema_json) validators[version_tuple] = Validator(schema_json) - return validators[version_tuple] + + # If fastjsonschema is installed use it to validate + if use_fast and fastjsonschema is not None: + fast_validators[version_tuple] = fastjsonschema.compile(schema_json) + + if use_fast and fastjsonschema is not None: + return fast_validators[version_tuple] + else: + return validators[version_tuple] def _get_schema_json(v, version=None, version_minor=None): @@ -241,7 +262,7 @@ def better_validation_error(error, version, version_minor): def validate(nbdict=None, ref=None, version=None, version_minor=None, - relax_add_props=False, nbjson=None): + relax_add_props=False, nbjson=None, use_fast=False): """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -270,10 +291,22 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = 1, 0 - for error in iter_validate(nbdict, ref=ref, version=version, - version_minor=version_minor, - relax_add_props=relax_add_props): - raise error + validator = get_validator(version, version_minor, relax_add_props=relax_add_props, + use_fast=True) + + if fastjsonschema is not None and use_fast: + if validator is None: + raise ValidationError("No schema for validating v%s notebooks" % version) + + try: + validator(nbdict) + except JsonSchemaException as e: + raise ValidationError(e.message, schema_path=e.path) + else: + for error in iter_validate(nbdict, ref=ref, version=version, + version_minor=version_minor, + relax_add_props=relax_add_props): + raise error def iter_validate(nbdict=None, ref=None, version=None, version_minor=None, @@ -294,7 +327,8 @@ def iter_validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = get_version(nbdict) - validator = get_validator(version, version_minor, relax_add_props=relax_add_props) + validator = get_validator(version, version_minor, relax_add_props=relax_add_props, + use_fast=False) if validator is None: # no validator From c235609a4edf8b393c785de0d823be66ac5ac26b Mon Sep 17 00:00:00 2001 From: goanpeca Date: Thu, 24 Sep 2020 09:38:12 -0500 Subject: [PATCH 2/7] Refactor code into a common validator --- nbformat/json_compat.py | 47 ++++++++++++++++++++++++++++++++ nbformat/validator.py | 59 +++++++---------------------------------- 2 files changed, 56 insertions(+), 50 deletions(-) create mode 100644 nbformat/json_compat.py diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py new file mode 100644 index 00000000..788b5723 --- /dev/null +++ b/nbformat/json_compat.py @@ -0,0 +1,47 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +""" +Common validator wrapper to provide a uniform usage of other schema validation +libraries. +""" + +from jsonschema import Draft4Validator as _JsonSchemaValidator +from jsonschema import ValidationError + +try: + import fastjsonschema + from fastjsonschema import JsonSchemaException as _JsonSchemaException +except ImportError: + fastjsonschema = None + _JsonSchemaException = ValidationError + + +class Validator: + """ + Common validator wrapper to provide a uniform usage of other schema validation + libraries. + """ + + def __init__(self, schema): + self._schema = schema + + # Validation libraries + self._jsonschema = _JsonSchemaValidator(schema) # Default + self._fastjsonschema_validate = fastjsonschema.compile(schema) if fastjsonschema else None + + def validate(self, data): + """ + Validate the schema of ``data``. + + Will use ``fastjsonschema`` if available. + """ + if fastjsonschema: + try: + self._fastjsonschema_validate(data) + except _JsonSchemaException as e: + raise ValidationError(e.message) + else: + self._jsonschema.validate(data) + + def iter_errors(self, data, schema=None): + return self._jsonschema.iter_errors(data, schema) diff --git a/nbformat/validator.py b/nbformat/validator.py index 0ea7ef84..ecf8b735 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -8,33 +8,12 @@ import sys import warnings -try: - from jsonschema import ValidationError - from jsonschema import Draft4Validator as Validator -except ImportError as e: - verbose_msg = """ - Jupyter notebook format depends on the jsonschema package: - - https://pypi.python.org/pypi/jsonschema - - Please install it first. - """ - raise ImportError(verbose_msg) from e - -# Use fastjsonschema if installed -try: - import fastjsonschema - from fastjsonschema import JsonSchemaException -except ImportError: - fastjsonschema = None - JsonSchemaException = ValidationError - from ipython_genutils.importstring import import_item +from .json_compat import Validator, ValidationError from .reader import get_version, reads validators = {} -fast_validators = {} def _relax_additional_properties(obj): """relax any `additionalProperties`""" @@ -59,7 +38,7 @@ def _allow_undefined(schema): ) return schema -def get_validator(version=None, version_minor=None, relax_add_props=False, use_fast=False): +def get_validator(version=None, version_minor=None, relax_add_props=False): """Load the JSON schema into a Validator""" if version is None: from . import current_nbformat @@ -86,10 +65,6 @@ def get_validator(version=None, version_minor=None, relax_add_props=False, use_f validators[version_tuple] = Validator(schema_json) - # If fastjsonschema is installed use it to validate - if use_fast and fastjsonschema is not None and version_tuple not in fast_validators: - fast_validators[version_tuple] = fastjsonschema.compile(schema_json) - if relax_add_props: try: schema_json = _get_schema_json(v, version=version, version_minor=version_minor) @@ -99,17 +74,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False, use_f # this allows properties to be added for intermediate # representations while validating for all other kinds of errors schema_json = _relax_additional_properties(schema_json) - validators[version_tuple] = Validator(schema_json) - - # If fastjsonschema is installed use it to validate - if use_fast and fastjsonschema is not None: - fast_validators[version_tuple] = fastjsonschema.compile(schema_json) - - if use_fast and fastjsonschema is not None: - return fast_validators[version_tuple] - else: - return validators[version_tuple] + return validators[version_tuple] def _get_schema_json(v, version=None, version_minor=None): @@ -291,17 +257,11 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = 1, 0 - validator = get_validator(version, version_minor, relax_add_props=relax_add_props, - use_fast=True) - - if fastjsonschema is not None and use_fast: - if validator is None: - raise ValidationError("No schema for validating v%s notebooks" % version) - - try: - validator(nbdict) - except JsonSchemaException as e: - raise ValidationError(e.message, schema_path=e.path) + validator = get_validator(version, version_minor, relax_add_props=relax_add_props) + if validator is None: + raise ValidationError("No schema for validating v%s notebooks" % version) + elif use_fast: + validator.validate(nbdict) else: for error in iter_validate(nbdict, ref=ref, version=version, version_minor=version_minor, @@ -327,8 +287,7 @@ def iter_validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = get_version(nbdict) - validator = get_validator(version, version_minor, relax_add_props=relax_add_props, - use_fast=False) + validator = get_validator(version, version_minor, relax_add_props=relax_add_props) if validator is None: # no validator From 4c708b635153979dcf8b55243516dc8b39718ff9 Mon Sep 17 00:00:00 2001 From: goanpeca Date: Thu, 24 Sep 2020 15:07:54 -0500 Subject: [PATCH 3/7] Split classes and add reading the current validator from environment variable --- nbformat/__init__.py | 4 +- nbformat/json_compat.py | 71 +++++--- nbformat/tests/base.py | 9 +- nbformat/tests/test_validator.py | 297 ++++++++++++++++++------------- nbformat/validator.py | 21 ++- 5 files changed, 242 insertions(+), 160 deletions(-) diff --git a/nbformat/__init__.py b/nbformat/__init__.py index 24cf75e3..fa05fc27 100644 --- a/nbformat/__init__.py +++ b/nbformat/__init__.py @@ -74,7 +74,7 @@ def reads(s, as_version, **kwargs): if as_version is not NO_CONVERT: nb = convert(nb, as_version) try: - validate(nb, use_fast=True) + validate(nb) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -104,7 +104,7 @@ def writes(nb, version=NO_CONVERT, **kwargs): else: version, _ = reader.get_version(nb) try: - validate(nb, use_fast=True) + validate(nb) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py index 788b5723..6f458c2b 100644 --- a/nbformat/json_compat.py +++ b/nbformat/json_compat.py @@ -5,6 +5,9 @@ libraries. """ +import os + +import jsonschema from jsonschema import Draft4Validator as _JsonSchemaValidator from jsonschema import ValidationError @@ -16,32 +19,56 @@ _JsonSchemaException = ValidationError -class Validator: - """ - Common validator wrapper to provide a uniform usage of other schema validation - libraries. - """ +class JsonSchemaValidator: + name = "jsonschema" def __init__(self, schema): self._schema = schema - - # Validation libraries - self._jsonschema = _JsonSchemaValidator(schema) # Default - self._fastjsonschema_validate = fastjsonschema.compile(schema) if fastjsonschema else None + self._default_validator = _JsonSchemaValidator(schema) # Default + self._validator = self._default_validator def validate(self, data): - """ - Validate the schema of ``data``. - - Will use ``fastjsonschema`` if available. - """ - if fastjsonschema: - try: - self._fastjsonschema_validate(data) - except _JsonSchemaException as e: - raise ValidationError(e.message) - else: - self._jsonschema.validate(data) + self._default_validator.validate(data) def iter_errors(self, data, schema=None): - return self._jsonschema.iter_errors(data, schema) + return self._default_validator.iter_errors(data, schema) + + +class FastJsonSchemaValidator(JsonSchemaValidator): + name = "fastjsonschema" + + def __init__(self, schema): + super().__init__(schema) + + self._validator = fastjsonschema.compile(schema) + + def validate(self, data): + try: + self._validator(data) + except _JsonSchemaException as error: + raise ValidationError(error.message, schema_path=error.path) + + +_VALIDATOR_MAP = [ + ("fastjsonschema", fastjsonschema, FastJsonSchemaValidator), + ("jsonschema", jsonschema, JsonSchemaValidator), +] +VALIDATORS = [item[0] for item in _VALIDATOR_MAP] + + +def _validator_for_name(validator_name): + if validator_name not in VALIDATORS: + raise ValueError("Invalid validator '{0}' value!\nValid values are: {1}".format( + validator_name, VALIDATORS)) + + for (name, module, validator_cls) in _VALIDATOR_MAP: + if module and validator_name == name: + return validator_cls + + +def get_current_validator(): + """ + Return the current validator based on the value of an environment variable. + """ + validator_name = os.environ.get("NBFORMAT_VALIDATOR", "jsonschema") + return _validator_for_name(validator_name) diff --git a/nbformat/tests/base.py b/nbformat/tests/base.py index 303d2f73..312c22bb 100644 --- a/nbformat/tests/base.py +++ b/nbformat/tests/base.py @@ -12,9 +12,10 @@ class TestsBase(unittest.TestCase): """Base tests class.""" - def fopen(self, f, mode=u'r',encoding='utf-8'): - return io.open(os.path.join(self._get_files_path(), f), mode, encoding=encoding) + @classmethod + def fopen(cls, f, mode=u'r',encoding='utf-8'): + return io.open(os.path.join(cls._get_files_path(), f), mode, encoding=encoding) - - def _get_files_path(self): + @classmethod + def _get_files_path(cls): return os.path.dirname(__file__) diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index 67750d3c..7723dc91 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -4,147 +4,196 @@ # Distributed under the terms of the Modified BSD License. import os -import time +import re from .base import TestsBase from jsonschema import ValidationError from nbformat import read from ..validator import isvalid, validate, iter_validate +from ..json_compat import VALIDATORS + +import pytest + + +# Fixtures +@pytest.fixture(autouse=True) +def clean_env_before_and_after_tests(): + """Fixture to clean up env variables before and after tests.""" + os.environ.pop("NBFORMAT_VALIDATOR", None) + yield + os.environ.pop("NBFORMAT_VALIDATOR", None) + + +# Helpers +def set_validator(validator_name): + os.environ["NBFORMAT_VALIDATOR"] = validator_name + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb2(validator_name): + """Test that a v2 notebook converted to current passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test2.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb3(validator_name): + """Test that a v3 notebook passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test3.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4(validator_name): + """Test that a v4 notebook passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4_document_info(validator_name): + """Test that a notebook with document_info passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4docinfo.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4custom(validator_name): + """Test that a notebook with a custom JSON mimetype passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4custom.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4jupyter_metadata(validator_name): + """Test that a notebook with a jupyter metadata passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4jupyter_metadata.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4jupyter_metadata_timings(validator_name): + """Tests that a notebook with "timing" in metadata passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_invalid(validator_name): + """Test than an invalid notebook does not pass validation""" + set_validator(validator_name) + # this notebook has a few different errors: + # - one cell is missing its source + # - invalid cell type + # - invalid output_type + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb) + assert not isvalid(nb) -try: - import fastjsonschema -except ImportError: - fastjsonschema = None +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_validate_empty(validator_name): + """Test that an empty notebook (invalid) fails validation""" + set_validator(validator_name) + with pytest.raises(ValidationError) as e: + validate({}) -class TestValidator(TestsBase): - def test_nb2(self): - """Test that a v2 notebook converted to current passes validation""" - with self.fopen(u'test2.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_future(validator_name): + """Test that a notebook from the future with extra keys passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4plus.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb, version=4, version_minor=3) - def test_nb3(self): - """Test that a v3 notebook passes validation""" - with self.fopen(u'test3.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) + assert not isvalid(nb, version=4, version_minor=3) + assert isvalid(nb) - def test_nb4(self): - """Test that a v4 notebook passes validation""" - with self.fopen(u'test4.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4_document_info(self): - """Test that a notebook with document_info passes validation""" - with self.fopen(u'test4docinfo.ipynb', u'r') as f: - nb = read(f, as_version=4) +# This is only a valid test for the default validator, jsonschema +@pytest.mark.parametrize("validator_name", ["jsonschema"]) +def test_validation_error(validator_name): + set_validator(validator_name) + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError) as exception_info: validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4custom(self): - """Test that a notebook with a custom JSON mimetype passes validation""" - with self.fopen(u'test4custom.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) + s = str(exception_info.value) + assert re.compile(r"validating .required. in markdown_cell").search(s) + assert re.compile(r"source.* is a required property").search(s) + assert re.compile(r"On instance\[u?['\"].*cells['\"]\]\[0\]").search(s) + assert len(s.splitlines()) < 10 - def test_nb4jupyter_metadata(self): - """Test that a notebook with a jupyter metadata passes validation""" - with self.fopen(u'test4jupyter_metadata.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4jupyter_metadata_timings(self): - """Tests that a notebook with "timing" in metadata passes validation""" - with self.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertTrue(isvalid(nb)) - - def test_invalid(self): - """Test than an invalid notebook does not pass validation""" - # this notebook has a few different errors: - # - one cell is missing its source - # - invalid cell type - # - invalid output_type - with self.fopen(u'invalid.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError): - validate(nb) - self.assertEqual(isvalid(nb), False) - - def test_validate_empty(self): - """Test that an empty notebook (invalid) fails validation""" - with self.assertRaises(ValidationError) as e: - validate({}) - - def test_future(self): - """Test that a notebook from the future with extra keys passes validation""" - with self.fopen(u'test4plus.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError): - validate(nb, version=4, version_minor=3) +# This is only a valid test for the default validator, jsonschema +@pytest.mark.parametrize("validator_name", ["jsonschema"]) +def test_iter_validation_error(validator_name): + set_validator(validator_name) + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) - self.assertEqual(isvalid(nb, version=4, version_minor=3), False) - self.assertEqual(isvalid(nb), True) + errors = list(iter_validate(nb)) + assert len(errors) == 3 + assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'} - def test_validation_error(self): - with self.fopen(u'invalid.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError) as e: - validate(nb) - s = str(e.exception) - self.assertRegex(s, "validating.*required.* in markdown_cell") - self.assertRegex(s, "source.* is a required property") - self.assertRegex(s, r"On instance\[u?['\"].*cells['\"]\]\[0\]") - self.assertLess(len(s.splitlines()), 10) - - def test_iter_validation_error(self): - with self.fopen(u'invalid.ipynb', u'r') as f: + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_iter_validation_empty(validator_name): + """Test that an empty notebook (invalid) fails validation via iter_validate""" + set_validator(validator_name) + errors = list(iter_validate({})) + assert len(errors) == 1 + assert type(errors[0]) == ValidationError + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_validation_no_version(validator_name): + """Test that an invalid notebook with no version fails validation""" + set_validator(validator_name) + with pytest.raises(ValidationError) as e: + validate({'invalid': 'notebook'}) + + +def test_invalid_validator_raises_value_error(): + """Test that an invalid notebook with no version fails validation""" + set_validator("foobar") + with pytest.raises(ValueError): + with TestsBase.fopen(u'test2.ipynb', u'r') as f: nb = read(f, as_version=4) - errors = list(iter_validate(nb)) - assert len(errors) == 3 - assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'} - - def test_iter_validation_empty(self): - """Test that an empty notebook (invalid) fails validation via iter_validate""" - errors = list(iter_validate({})) - assert len(errors) == 1 - assert type(errors[0]) == ValidationError - - def test_validation_no_version(self): - """Test that an invalid notebook with no version fails validation""" - with self.assertRaises(ValidationError) as e: - validate({'invalid': 'notebook'}) - - def test_fast_validation(self): - """ - Test that valid notebook with too many outputs is parsed ~12 times - faster with fastjsonschema. - """ - if fastjsonschema: - with self.fopen(u'many_tracebacks.ipynb', u'r') as f: - nb = read(f, as_version=4) - - # Multiply output - base_output = nb["cells"][0]["outputs"][0] - for i in range(50000): - nb["cells"][0]["outputs"].append(base_output) - - start_time = time.time() - validate(nb, use_fast=True) - fast_time = time.time() - start_time - - start_time = time.time() - validate(nb) - slow_time = time.time() - start_time - - self.assertGreater(slow_time / fast_time, 12) + +def test_invalid_validator_raises_value_error_after_read(): + """Test that an invalid notebook with no version fails validation""" + set_validator("jsonschema") + with TestsBase.fopen(u'test2.ipynb', u'r') as f: + nb = read(f, as_version=4) + + set_validator("foobar") + with pytest.raises(ValueError): + validate(nb) diff --git a/nbformat/validator.py b/nbformat/validator.py index ecf8b735..baf37202 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -9,10 +9,9 @@ import warnings from ipython_genutils.importstring import import_item -from .json_compat import Validator, ValidationError +from .json_compat import get_current_validator, ValidationError from .reader import get_version, reads - validators = {} def _relax_additional_properties(obj): @@ -49,7 +48,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): if version_minor is None: version_minor = current_minor - version_tuple = (version, version_minor) + current_validator = get_current_validator() + version_tuple = (current_validator.name, version, version_minor) if version_tuple not in validators: try: @@ -63,7 +63,7 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): # and allow undefined cell types and outputs schema_json = _allow_undefined(schema_json) - validators[version_tuple] = Validator(schema_json) + validators[version_tuple] = current_validator(schema_json) if relax_add_props: try: @@ -74,7 +74,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): # this allows properties to be added for intermediate # representations while validating for all other kinds of errors schema_json = _relax_additional_properties(schema_json) - validators[version_tuple] = Validator(schema_json) + validators[version_tuple] = current_validator(schema_json) + return validators[version_tuple] @@ -260,12 +261,16 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, validator = get_validator(version, version_minor, relax_add_props=relax_add_props) if validator is None: raise ValidationError("No schema for validating v%s notebooks" % version) - elif use_fast: + elif validator.name != "jsonschema": + # If not using default validator then, skip iter_validate, and provide + # less legible errors validator.validate(nbdict) else: + # If using default validator then use iter_validate, and provide + # more readable errors for error in iter_validate(nbdict, ref=ref, version=version, - version_minor=version_minor, - relax_add_props=relax_add_props): + version_minor=version_minor, + relax_add_props=relax_add_props): raise error From 49da28f59e08b0e529b6c8823e840ada61cdf6b6 Mon Sep 17 00:00:00 2001 From: goanpeca Date: Thu, 24 Sep 2020 15:56:06 -0500 Subject: [PATCH 4/7] Add review comments. --- nbformat/json_compat.py | 14 +++++++++++--- nbformat/validator.py | 21 +++++---------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py index 6f458c2b..936d3c87 100644 --- a/nbformat/json_compat.py +++ b/nbformat/json_compat.py @@ -38,8 +38,6 @@ class FastJsonSchemaValidator(JsonSchemaValidator): name = "fastjsonschema" def __init__(self, schema): - super().__init__(schema) - self._validator = fastjsonschema.compile(schema) def validate(self, data): @@ -48,6 +46,16 @@ def validate(self, data): except _JsonSchemaException as error: raise ValidationError(error.message, schema_path=error.path) + def iter_errors(self, data, schema=None): + errors = [] + validate_func = self._validator if schema is None else fastjsonschema.compile(schema) + try: + validate_func(data) + except _JsonSchemaException as error: + errors = [ValidationError(error.message, schema_path=error.path)] + + return errors + _VALIDATOR_MAP = [ ("fastjsonschema", fastjsonschema, FastJsonSchemaValidator), @@ -68,7 +76,7 @@ def _validator_for_name(validator_name): def get_current_validator(): """ - Return the current validator based on the value of an environment variable. + Return the default validator based on the value of an environment variable. """ validator_name = os.environ.get("NBFORMAT_VALIDATOR", "jsonschema") return _validator_for_name(validator_name) diff --git a/nbformat/validator.py b/nbformat/validator.py index baf37202..bd44729c 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -229,7 +229,7 @@ def better_validation_error(error, version, version_minor): def validate(nbdict=None, ref=None, version=None, version_minor=None, - relax_add_props=False, nbjson=None, use_fast=False): + relax_add_props=False, nbjson=None): """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -245,7 +245,6 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, else: raise TypeError("validate() missing 1 required argument: 'nbdict'") - if ref is None: # if ref is not specified, we have a whole notebook, so we can get the version nbdict_version, nbdict_version_minor = get_version(nbdict) @@ -258,20 +257,10 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = 1, 0 - validator = get_validator(version, version_minor, relax_add_props=relax_add_props) - if validator is None: - raise ValidationError("No schema for validating v%s notebooks" % version) - elif validator.name != "jsonschema": - # If not using default validator then, skip iter_validate, and provide - # less legible errors - validator.validate(nbdict) - else: - # If using default validator then use iter_validate, and provide - # more readable errors - for error in iter_validate(nbdict, ref=ref, version=version, - version_minor=version_minor, - relax_add_props=relax_add_props): - raise error + for error in iter_validate(nbdict, ref=ref, version=version, + version_minor=version_minor, + relax_add_props=relax_add_props): + raise error def iter_validate(nbdict=None, ref=None, version=None, version_minor=None, From eb74cf7d23b17a66d978ccf355cf769ed76c1ac0 Mon Sep 17 00:00:00 2001 From: goanpeca Date: Sun, 4 Oct 2020 15:11:33 -0500 Subject: [PATCH 5/7] Update docs, readme and changelog and CI for tests --- .github/workflows/tests.yml | 1 + README.md | 13 +++++++++++-- docs/api.rst | 12 ++++++++++++ docs/changelog.rst | 4 ++++ setup.py | 1 + 5 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f8377d92..c887798c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,6 +23,7 @@ jobs: run: | pip install --upgrade pip setuptools pip install nbformat[test] + pip install nbformat[fast] pip install codecov - name: Install nbformat run: | diff --git a/README.md b/README.md index 0322a5e3..f1ddadd7 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ [![codecov.io](https://codecov.io/github/jupyter/nbformat/coverage.svg?branch=master)](https://codecov.io/github/jupyter/nbformat?branch=master) [![Code Health](https://landscape.io/github/jupyter/nbformat/master/landscape.svg?style=flat)](https://landscape.io/github/jupyter/nbformat/master) - - +![CI Tests](https://github.com/jupyter/nbformat/workflows/Run%20tests/badge.svg) `nbformat` contains the reference implementation of the [Jupyter Notebook format][], and Python APIs for working with notebooks. @@ -20,6 +19,16 @@ From the command line: pip install nbformat ``` +## Using a different json schema validator + +You can install and use [fastjsonschema](https://horejsek.github.io/python-fastjsonschema/) by running: + +``` {.sourceCode .bash} +pip install nbformat[fast] +``` + +To enable fast validation with `fastjsonschema`, set the environment variable `NBFORMAT_VALIDATOR` to the value `fastjsonschema`. + ## Python Version Support This library supported python 2.7 and python 3.5+ for `4.x.x` releases. With python 2's end-of-life nbformat `5.x.x` is now python 3.5+ only. Support for 3.5 will be dropped when it's officially sunset by the python organization. diff --git a/docs/api.rst b/docs/api.rst index 2e5493ec..dddde60b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -120,3 +120,15 @@ bindings are available, and an in-memory store otherwise. .. autoclass:: SQLiteSignatureStore .. autoclass:: MemorySignatureStore + +Optional fast validation +------------------------ + +You can install and use `fastjsonschema `_ by running:: + + pip install nbformat[fast] + + +To enable fast validation with `fastjsonschema`, set the environment variable:: + + NBFORMAT_VALIDATOR="fastjsonschema" diff --git a/docs/changelog.rst b/docs/changelog.rst index b6b3b89e..8367576f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,10 @@ Changes in nbformat In Development ============== +- Add optional support for using `fastjsonschema` as the JSON validation library. + To enable fast validation, install `fastjsonschema` and set the environment + variable `NBFORMAT_VALIDATOR` to the value `fastjsonschema`. + 5.0.7 ===== diff --git a/setup.py b/setup.py index ea4c93af..b9876908 100644 --- a/setup.py +++ b/setup.py @@ -90,6 +90,7 @@ ] extras_require = setuptools_args['extras_require'] = { + 'fast': ['fastjsonschema'], 'test': ['testpath', 'pytest', 'pytest-cov'], } From 99603b11a38f2f93e2e5e0c547df1acef53635fd Mon Sep 17 00:00:00 2001 From: goanpeca Date: Sun, 4 Oct 2020 15:15:40 -0500 Subject: [PATCH 6/7] Fix extras --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c887798c..92c5e56c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,8 +22,8 @@ jobs: - name: Install test dependencies run: | pip install --upgrade pip setuptools - pip install nbformat[test] - pip install nbformat[fast] + pip install .[test] + pip install .[fast] pip install codecov - name: Install nbformat run: | From e977d2252614e359c87b7ad5178d9997ed4373d5 Mon Sep 17 00:00:00 2001 From: goanpeca Date: Tue, 6 Oct 2020 13:16:30 -0500 Subject: [PATCH 7/7] Add fastjsonshcema to test deps and update CI --- .github/workflows/tests.yml | 1 - setup.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 92c5e56c..0105c34f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,7 +23,6 @@ jobs: run: | pip install --upgrade pip setuptools pip install .[test] - pip install .[fast] pip install codecov - name: Install nbformat run: | diff --git a/setup.py b/setup.py index b9876908..5fb46c8d 100644 --- a/setup.py +++ b/setup.py @@ -91,7 +91,7 @@ extras_require = setuptools_args['extras_require'] = { 'fast': ['fastjsonschema'], - 'test': ['testpath', 'pytest', 'pytest-cov'], + 'test': ['fastjsonschema', 'testpath', 'pytest', 'pytest-cov'], } if 'setuptools' in sys.modules: