diff --git a/nbstripout/__init__.py b/nbstripout/__init__.py index a1009c6..5406ebb 100644 --- a/nbstripout/__init__.py +++ b/nbstripout/__init__.py @@ -1,5 +1,5 @@ from ._nbstripout import install, uninstall, status, main, __doc__ as docstring from ._utils import pop_recursive, strip_output, MetadataError -__all__ = ["install", "uninstall", "status", "main", - "pop_recursive", "strip_output", "MetadataError"] + +__all__ = ['install', 'uninstall', 'status', 'main', 'pop_recursive', 'strip_output', 'MetadataError'] __doc__ = docstring diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 79e61ee..7c329c2 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -125,7 +125,7 @@ from nbstripout._utils import strip_output, strip_zeppelin_output -__all__ = ["install", "uninstall", "status", "main"] +__all__ = ['install', 'uninstall', 'status', 'main'] __version__ = '0.8.1' @@ -136,13 +136,17 @@ def _get_system_gitconfig_folder(): try: - git_config_output = check_output(['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True, stderr=STDOUT).strip() + git_config_output = check_output( + ['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True, stderr=STDOUT + ).strip() # If the output is empty, it means the file exists but is empty, so we cannot get the path. # To still get it, we're setting a temporary config parameter. if git_config_output == '': check_call(['git', 'config', '--system', 'filter.nbstripoutput.test', 'test']) - git_config_output = check_output(['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True).strip() + git_config_output = check_output( + ['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True + ).strip() check_call(['git', 'config', '--system', '--unset', 'filter.nbstripoutput.test']) output_lines = git_config_output.split('\n') @@ -192,7 +196,7 @@ def _parse_size(num_str): elif num_str[-1] == 'G': return int(num_str[:-1]) * (10**9) else: - raise ValueError(f"Unknown size identifier {num_str[-1]}") + raise ValueError(f'Unknown size identifier {num_str[-1]}') def install(git_config, install_location=INSTALL_LOCATION_LOCAL, python=None, attrfile=None): @@ -280,7 +284,9 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): elif install_location == INSTALL_LOCATION_GLOBAL: location = 'globally' else: - git_dir = path.dirname(path.abspath(check_output(['git', 'rev-parse', '--git-dir'], universal_newlines=True).strip())) + git_dir = path.dirname( + path.abspath(check_output(['git', 'rev-parse', '--git-dir'], universal_newlines=True).strip()) + ) location = f"in repository '{git_dir}'" clean = check_output(git_config + ['filter.nbstripout.clean'], universal_newlines=True).strip() @@ -299,7 +305,9 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): diff_attributes = ''.join(line for line in attrs if 'diff' in line).strip() else: attributes = check_output(['git', 'check-attr', 'filter', '--', '*.ipynb'], universal_newlines=True).strip() - diff_attributes = check_output(['git', 'check-attr', 'diff', '--', '*.ipynb'], universal_newlines=True).strip() + diff_attributes = check_output( + ['git', 'check-attr', 'diff', '--', '*.ipynb'], universal_newlines=True + ).strip() try: extra_keys = check_output(git_config + ['filter.nbstripout.extrakeys'], universal_newlines=True).strip() @@ -333,16 +341,24 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): return 1 + def process_jupyter_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'): with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=UserWarning) + warnings.simplefilter('ignore', category=UserWarning) nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT) nb_orig = copy.deepcopy(nb) - nb_stripped = strip_output(nb, args.keep_output, args.keep_count, - args.keep_id, extra_keys, args.drop_empty_cells, - args.drop_tagged_cells.split(), - args.strip_init_cells, _parse_size(args.max_size)) + nb_stripped = strip_output( + nb, + args.keep_output, + args.keep_count, + args.keep_id, + extra_keys, + args.drop_empty_cells, + args.drop_tagged_cells.split(), + args.strip_init_cells, + _parse_size(args.max_size), + ) any_change = nb_orig != nb_stripped @@ -355,11 +371,12 @@ def process_jupyter_notebook(input_stream, output_stream, args, extra_keys, file output_stream.seek(0) output_stream.truncate() with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=UserWarning) + warnings.simplefilter('ignore', category=UserWarning) nbformat.write(nb_stripped, output_stream) output_stream.flush() return any_change + def process_zeppelin_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'): nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict) nb_orig = copy.deepcopy(nb) @@ -380,66 +397,94 @@ def process_zeppelin_notebook(input_stream, output_stream, args, extra_keys, fil output_stream.flush() return any_change + def main(): parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) task = parser.add_mutually_exclusive_group() - task.add_argument('--dry-run', action='store_true', - help='Print which notebooks would have been stripped') - task.add_argument('--install', action='store_true', - help='Install nbstripout in the current repository (set ' - 'up the git filter and attributes)') - task.add_argument('--uninstall', action='store_true', - help='Uninstall nbstripout from the current repository ' - '(remove the git filter and attributes)') - task.add_argument('--is-installed', action='store_true', - help='Check if nbstripout is installed in current repository') - task.add_argument('--status', action='store_true', - help='Print status of nbstripout installation in current ' - 'repository and configuration summary if installed') - task.add_argument('--version', action='store_true', - help='Print version') - parser.add_argument("--verify", action="store_true", - help="Return a non-zero exit code if any files were changed, Implies --dry-run") - parser.add_argument('--keep-count', action='store_true', - help='Do not strip the execution count/prompt number') - parser.add_argument('--keep-output', action='store_true', - help='Do not strip output', default=None) - parser.add_argument('--keep-id', action='store_true', - help='Keep the randomly generated cell ids, ' - 'which will be different after each execution.') - parser.add_argument('--extra-keys', default='', - help='Space separated list of extra keys to strip ' - 'from metadata, e.g. metadata.foo cell.metadata.bar') - parser.add_argument('--keep-metadata-keys', default='', - help='Space separated list of metadata keys to keep' - ', e.g. metadata.foo cell.metadata.bar') - parser.add_argument('--drop-empty-cells', action='store_true', - help='Remove cells where `source` is empty or contains only whitepace') - parser.add_argument('--drop-tagged-cells', default='', - help='Space separated list of cell-tags that remove an entire cell') - parser.add_argument('--strip-init-cells', action='store_true', - help='Remove cells with `init_cell: true` metadata (default: False)') - parser.add_argument('--attributes', metavar='FILEPATH', - help='Attributes file to add the filter to (in ' - 'combination with --install/--uninstall), ' - 'defaults to .git/info/attributes') + task.add_argument('--dry-run', action='store_true', help='Print which notebooks would have been stripped') + task.add_argument( + '--install', + action='store_true', + help='Install nbstripout in the current repository (set up the git filter and attributes)', + ) + task.add_argument( + '--uninstall', + action='store_true', + help='Uninstall nbstripout from the current repository (remove the git filter and attributes)', + ) + task.add_argument( + '--is-installed', action='store_true', help='Check if nbstripout is installed in current repository' + ) + task.add_argument( + '--status', + action='store_true', + help='Print status of nbstripout installation in current repository and configuration summary if installed', + ) + task.add_argument('--version', action='store_true', help='Print version') + parser.add_argument( + '--verify', action='store_true', help='Return a non-zero exit code if any files were changed, Implies --dry-run' + ) + parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') + parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None) + parser.add_argument( + '--keep-id', + action='store_true', + help='Keep the randomly generated cell ids, which will be different after each execution.', + ) + parser.add_argument( + '--extra-keys', + default='', + help='Space separated list of extra keys to strip from metadata, e.g. metadata.foo cell.metadata.bar', + ) + parser.add_argument( + '--keep-metadata-keys', + default='', + help='Space separated list of metadata keys to keep, e.g. metadata.foo cell.metadata.bar', + ) + parser.add_argument( + '--drop-empty-cells', + action='store_true', + help='Remove cells where `source` is empty or contains only whitepace', + ) + parser.add_argument( + '--drop-tagged-cells', default='', help='Space separated list of cell-tags that remove an entire cell' + ) + parser.add_argument( + '--strip-init-cells', action='store_true', help='Remove cells with `init_cell: true` metadata (default: False)' + ) + parser.add_argument( + '--attributes', + metavar='FILEPATH', + help='Attributes file to add the filter to (in ' + 'combination with --install/--uninstall), ' + 'defaults to .git/info/attributes', + ) location = parser.add_mutually_exclusive_group() - location.add_argument('--global', dest='_global', action='store_true', - help='Use global git config (default is local config)') - location.add_argument('--system', dest='_system', action='store_true', - help='Use system git config (default is local config)') - location.add_argument('--python', dest='_python', metavar="PATH", - help='Path to python executable to use when --install\'ing ' - '(default is deduced from `sys.executable`)') - parser.add_argument('--force', '-f', action='store_true', - help='Strip output also from files with non ipynb extension') - parser.add_argument('--max-size', metavar='SIZE', - help='Keep outputs smaller than SIZE', default='0') - parser.add_argument('--mode', '-m', default='jupyter', choices=['jupyter', 'zeppelin'], - help='Specify mode between [jupyter (default) | zeppelin] (to be used in combination with -f)') - - parser.add_argument('--textconv', '-t', action='store_true', - help='Prints stripped files to STDOUT') + location.add_argument( + '--global', dest='_global', action='store_true', help='Use global git config (default is local config)' + ) + location.add_argument( + '--system', dest='_system', action='store_true', help='Use system git config (default is local config)' + ) + location.add_argument( + '--python', + dest='_python', + metavar='PATH', + help="Path to python executable to use when --install'ing (default is deduced from `sys.executable`)", + ) + parser.add_argument( + '--force', '-f', action='store_true', help='Strip output also from files with non ipynb extension' + ) + parser.add_argument('--max-size', metavar='SIZE', help='Keep outputs smaller than SIZE', default='0') + parser.add_argument( + '--mode', + '-m', + default='jupyter', + choices=['jupyter', 'zeppelin'], + help='Specify mode between [jupyter (default) | zeppelin] (to be used in combination with -f)', + ) + + parser.add_argument('--textconv', '-t', action='store_true', help='Prints stripped files to STDOUT') parser.add_argument('files', nargs='*', help='Files to strip output from') args = parser.parse_args() @@ -482,17 +527,29 @@ def main(): ] try: - extra_keys.extend(check_output((git_config if args._system or args._global else ['git', 'config']) + ['filter.nbstripout.extrakeys'], universal_newlines=True).strip().split()) + extra_keys.extend( + check_output( + (git_config if args._system or args._global else ['git', 'config']) + ['filter.nbstripout.extrakeys'], + universal_newlines=True, + ) + .strip() + .split() + ) except (CalledProcessError, FileNotFoundError): pass extra_keys.extend(args.extra_keys.split()) try: - keep_metadata_keys = check_output( - (git_config if args._system or args._global else ['git', 'config']) + ['filter.nbstripout.keepmetadatakeys'], - universal_newlines=True - ).strip().split() + keep_metadata_keys = ( + check_output( + (git_config if args._system or args._global else ['git', 'config']) + + ['filter.nbstripout.keepmetadatakeys'], + universal_newlines=True, + ) + .strip() + .split() + ) except (CalledProcessError, FileNotFoundError): keep_metadata_keys = [] keep_metadata_keys.extend(args.keep_metadata_keys.split()) @@ -533,6 +590,6 @@ def main(): except nbformat.reader.NotJSONError: print('No valid notebook detected on stdin', file=sys.stderr) raise SystemExit(1) - + if args.verify and any_change: raise SystemExit(1) diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py index 026cd03..3b6e14e 100644 --- a/nbstripout/_utils.py +++ b/nbstripout/_utils.py @@ -1,7 +1,7 @@ from collections import defaultdict import sys -__all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"] +__all__ = ['pop_recursive', 'strip_output', 'strip_zeppelin_output', 'MetadataError'] class MetadataError(Exception): @@ -45,7 +45,7 @@ def _cells(nb, conditionals): def get_size(item): - """ Recursively sums length of all strings in `item` """ + """Recursively sums length of all strings in `item`""" if isinstance(item, str): return len(item) elif isinstance(item, list): @@ -73,9 +73,7 @@ def determine_keep_output(cell, default, strip_init_cells=False): # keep_output between metadata and tags should not contradict each other if has_keep_output_metadata and has_keep_output_tag and not keep_output_metadata: - raise MetadataError( - 'cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags' - ) + raise MetadataError('cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags') if has_keep_output_metadata or has_keep_output_tag: return keep_output_metadata or has_keep_output_tag @@ -94,8 +92,17 @@ def strip_zeppelin_output(nb): return nb -def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[], - strip_init_cells=False, max_size=0): +def strip_output( + nb, + keep_output, + keep_count, + keep_id, + extra_keys=[], + drop_empty_cells=False, + drop_tagged_cells=[], + strip_init_cells=False, + max_size=0, +): """ Strip the outputs, execution count/prompt number and miscellaneous metadata from a notebook object, unless specified to keep either the outputs @@ -122,18 +129,16 @@ def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty if drop_empty_cells: conditionals.append(lambda c: any(line.strip() for line in c.get('source', []))) for tag_to_drop in drop_tagged_cells: - conditionals.append(lambda c: tag_to_drop not in c.get("metadata", {}).get("tags", [])) + conditionals.append(lambda c: tag_to_drop not in c.get('metadata', {}).get('tags', [])) for i, cell in enumerate(_cells(nb, conditionals)): keep_output_this_cell = determine_keep_output(cell, keep_output, strip_init_cells) # Remove the outputs, unless directed otherwise if 'outputs' in cell: - # Default behavior (max_size == 0) strips all outputs. if not keep_output_this_cell: - cell['outputs'] = [output for output in cell['outputs'] - if get_size(output) <= max_size] + cell['outputs'] = [output for output in cell['outputs'] if get_size(output) <= max_size] # Strip the counts from the outputs that were kept if not keep_count. if not keep_count: diff --git a/setup.py b/setup.py index 8e13364..a1eff32 100644 --- a/setup.py +++ b/setup.py @@ -3,44 +3,37 @@ with open('README.md') as f: long_description = f.read() -install_requires = [ - 'nbformat' -] - -setup(name='nbstripout', - version='0.8.1', - - author='Florian Rathgeber', - author_email='florian.rathgeber@gmail.com', - url='https://github.com/kynan/nbstripout', - - license="License :: OSI Approved :: MIT License", - - description='Strips outputs from Jupyter and IPython notebooks', - long_description=long_description, - long_description_content_type='text/markdown', - packages=find_packages(), - provides=['nbstripout'], - entry_points={ - 'console_scripts': [ - 'nbstripout = nbstripout._nbstripout:main' - ], - }, - - install_requires=install_requires, - python_requires='>=3.8', - - classifiers=[ - "Development Status :: 4 - Beta", - "Environment :: Other Environment", - "Framework :: IPython", - "Intended Audience :: Developers", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Software Development :: Version Control", - ]) +install_requires = ['nbformat'] + +setup( + name='nbstripout', + version='0.8.1', + author='Florian Rathgeber', + author_email='florian.rathgeber@gmail.com', + url='https://github.com/kynan/nbstripout', + license='License :: OSI Approved :: MIT License', + description='Strips outputs from Jupyter and IPython notebooks', + long_description=long_description, + long_description_content_type='text/markdown', + packages=find_packages(), + provides=['nbstripout'], + entry_points={ + 'console_scripts': ['nbstripout = nbstripout._nbstripout:main'], + }, + install_requires=install_requires, + python_requires='>=3.8', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: Other Environment', + 'Framework :: IPython', + 'Intended Audience :: Developers', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Topic :: Software Development :: Version Control', + ], +) diff --git a/tests/conftest.py b/tests/conftest.py index 694d7d5..bc711e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1 +1 @@ -pytest_plugins = "pytester" +pytest_plugins = 'pytester' diff --git a/tests/test_diff_from_bash_subshells.py b/tests/test_diff_from_bash_subshells.py index 15658de..83cec23 100644 --- a/tests/test_diff_from_bash_subshells.py +++ b/tests/test_diff_from_bash_subshells.py @@ -4,12 +4,12 @@ import pytest # fix this before pytester.chdir() happens -NOTEBOOKS_FOLDER = Path("tests").absolute() +NOTEBOOKS_FOLDER = Path('tests').absolute() def test_diff_with_process_substitution_nodiff(pytester: pytest.Pytester): - if sys.platform.startswith("win"): - pytest.skip("test requires proper bash shell") + if sys.platform.startswith('win'): + pytest.skip('test requires proper bash shell') r = pytester.run( 'bash', @@ -21,17 +21,19 @@ def test_diff_with_process_substitution_nodiff(pytester: pytest.Pytester): def test_diff_with_process_substitution_diff(pytester: pytest.Pytester): - if sys.platform.startswith("win"): - pytest.skip("test requires proper bash shell") + if sys.platform.startswith('win'): + pytest.skip('test requires proper bash shell') r = pytester.run( 'bash', '-c', f'diff <( nbstripout -t {NOTEBOOKS_FOLDER / "test_diff.ipynb"} ) <( nbstripout -t {NOTEBOOKS_FOLDER / "test_diff_different.ipynb"} )', ) - r.stdout.re_match_lines(r"""(.*) + r.stdout.re_match_lines( + r"""(.*) < "print(\"aou\")" --- (.*\"print\(\\\"aou now it is different\\\"\)\") -""".splitlines()) +""".splitlines() + ) assert r.ret == 1 diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index 6638826..555b843 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -2,70 +2,98 @@ from pathlib import Path import re from subprocess import run, PIPE + # Note: typing.Pattern is deprecated, for removal in 3.13 in favour of re.Pattern introduced in 3.8 from typing import List, Union, Pattern import pytest -NOTEBOOKS_FOLDER = Path("tests/e2e_notebooks") +NOTEBOOKS_FOLDER = Path('tests/e2e_notebooks') TEST_CASES = [ - ("test_drop_empty_cells.ipynb", "test_drop_empty_cells_dontdrop.ipynb.expected", []), - ("test_drop_empty_cells.ipynb", "test_drop_empty_cells.ipynb.expected", ["--drop-empty-cells"]), - ("test_drop_tagged_cells.ipynb", "test_drop_tagged_cells_dontdrop.ipynb.expected", []), - ("test_drop_tagged_cells.ipynb", "test_drop_tagged_cells.ipynb.expected", ['--drop-tagged-cells=test']), - ("test_execution_timing.ipynb", "test_execution_timing.ipynb.expected", []), - ("test_max_size.ipynb", "test_max_size.ipynb.expected", ["--max-size", "50", "--keep-id"]), - ("test_max_size.ipynb", "test_max_size.ipynb.expected_sequential_id", ["--max-size", "50"]), - ("test_empty_metadata.ipynb", "test_empty_metadata.ipynb.expected", []), - ("test_metadata.ipynb", "test_metadata.ipynb.expected", []), - ("test_metadata.ipynb", "test_metadata_extra_keys.ipynb.expected", ["--extra-keys", "metadata.kernelspec metadata.language_info"]), - ("test_metadata.ipynb", "test_metadata_keep_count.ipynb.expected", ["--keep-count"]), - ("test_metadata.ipynb", "test_metadata_keep_output.ipynb.expected", ["--keep-output"]), - ("test_metadata.ipynb", "test_metadata_keep_output_keep_count.ipynb.expected", ["--keep-output", "--keep-count"]), - ("test_metadata_notebook.ipynb", "test_metadata_notebook.ipynb.expected", []), - ("test_keep_metadata_keys.ipynb", "test_keep_metadata_keys.ipynb.expected", ["--keep-metadata-keys", "cell.metadata.scrolled cell.metadata.collapsed metadata.a"]), - ("test_metadata_period.ipynb", "test_metadata_period.ipynb.expected", ["--extra-keys", "cell.metadata.application/vnd.databricks.v1+cell metadata.application/vnd.databricks.v1+notebook"]), - ("test_strip_init_cells.ipynb", "test_strip_init_cells.ipynb.expected", ["--strip-init-cells"]), - ("test_nbformat2.ipynb", "test_nbformat2.ipynb.expected", []), - ("test_nbformat45.ipynb", "test_nbformat45.ipynb.expected", ["--keep-id"]), - ("test_nbformat45.ipynb", "test_nbformat45.ipynb.expected_sequential_id", []), - ("test_missing_nbformat.ipynb", "test_missing_nbformat.ipynb.expected", []), - ("test_unicode.ipynb", "test_unicode.ipynb.expected", []), - ("test_widgets.ipynb", "test_widgets.ipynb.expected", []), - ("test_zeppelin.zpln", "test_zeppelin.zpln.expected", ["--mode", "zeppelin"]), + ('test_drop_empty_cells.ipynb', 'test_drop_empty_cells_dontdrop.ipynb.expected', []), + ('test_drop_empty_cells.ipynb', 'test_drop_empty_cells.ipynb.expected', ['--drop-empty-cells']), + ('test_drop_tagged_cells.ipynb', 'test_drop_tagged_cells_dontdrop.ipynb.expected', []), + ('test_drop_tagged_cells.ipynb', 'test_drop_tagged_cells.ipynb.expected', ['--drop-tagged-cells=test']), + ('test_execution_timing.ipynb', 'test_execution_timing.ipynb.expected', []), + ('test_max_size.ipynb', 'test_max_size.ipynb.expected', ['--max-size', '50', '--keep-id']), + ('test_max_size.ipynb', 'test_max_size.ipynb.expected_sequential_id', ['--max-size', '50']), + ('test_empty_metadata.ipynb', 'test_empty_metadata.ipynb.expected', []), + ('test_metadata.ipynb', 'test_metadata.ipynb.expected', []), + ( + 'test_metadata.ipynb', + 'test_metadata_extra_keys.ipynb.expected', + ['--extra-keys', 'metadata.kernelspec metadata.language_info'], + ), + ('test_metadata.ipynb', 'test_metadata_keep_count.ipynb.expected', ['--keep-count']), + ('test_metadata.ipynb', 'test_metadata_keep_output.ipynb.expected', ['--keep-output']), + ('test_metadata.ipynb', 'test_metadata_keep_output_keep_count.ipynb.expected', ['--keep-output', '--keep-count']), + ('test_metadata_notebook.ipynb', 'test_metadata_notebook.ipynb.expected', []), + ( + 'test_keep_metadata_keys.ipynb', + 'test_keep_metadata_keys.ipynb.expected', + ['--keep-metadata-keys', 'cell.metadata.scrolled cell.metadata.collapsed metadata.a'], + ), + ( + 'test_metadata_period.ipynb', + 'test_metadata_period.ipynb.expected', + [ + '--extra-keys', + 'cell.metadata.application/vnd.databricks.v1+cell metadata.application/vnd.databricks.v1+notebook', + ], + ), + ('test_strip_init_cells.ipynb', 'test_strip_init_cells.ipynb.expected', ['--strip-init-cells']), + ('test_nbformat2.ipynb', 'test_nbformat2.ipynb.expected', []), + ('test_nbformat45.ipynb', 'test_nbformat45.ipynb.expected', ['--keep-id']), + ('test_nbformat45.ipynb', 'test_nbformat45.ipynb.expected_sequential_id', []), + ('test_missing_nbformat.ipynb', 'test_missing_nbformat.ipynb.expected', []), + ('test_unicode.ipynb', 'test_unicode.ipynb.expected', []), + ('test_widgets.ipynb', 'test_widgets.ipynb.expected', []), + ('test_zeppelin.zpln', 'test_zeppelin.zpln.expected', ['--mode', 'zeppelin']), ] DRY_RUN_CASES = [ - ("test_metadata.ipynb", [], True), - ("test_zeppelin.zpln", ["--mode", "zeppelin"], True), - ("test_nochange.ipynb", [], False), + ('test_metadata.ipynb', [], True), + ('test_zeppelin.zpln', ['--mode', 'zeppelin'], True), + ('test_nochange.ipynb', [], False), ] ERR_OUTPUT_CASES = [ - ("test_metadata.ipynb", ["Ignoring invalid extra key `invalid`", "Ignoring invalid extra key `foo.invalid`"], ["--extra-keys", "invalid foo.invalid"]), - ("test_metadata_exception.ipynb", [re.compile(".*MetadataError: cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags")], []), - ("test_invalid_json.ipynb", ["No valid notebook detected on stdin"], []), + ( + 'test_metadata.ipynb', + ['Ignoring invalid extra key `invalid`', 'Ignoring invalid extra key `foo.invalid`'], + ['--extra-keys', 'invalid foo.invalid'], + ), + ( + 'test_metadata_exception.ipynb', + [ + re.compile( + '.*MetadataError: cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags' + ) + ], + [], + ), + ('test_invalid_json.ipynb', ['No valid notebook detected on stdin'], []), ] def nbstripout_exe(): - return os.environ.get("NBSTRIPOUT_EXE", "nbstripout") + return os.environ.get('NBSTRIPOUT_EXE', 'nbstripout') -@pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES) -@pytest.mark.parametrize("verify", (True, False)) +@pytest.mark.parametrize('input_file, expected_file, args', TEST_CASES) +@pytest.mark.parametrize('verify', (True, False)) def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool): - with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / expected_file, mode='r') as f: expected = f.read() - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: input_ = f.read() - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: args = [nbstripout_exe()] + args if verify: - args.append("--verify") + args.append('--verify') pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True) output = pc.stdout @@ -77,32 +105,32 @@ def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], assert pc.returncode == 0 -@pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES) -@pytest.mark.parametrize("verify", (True, False)) +@pytest.mark.parametrize('input_file, expected_file, args', TEST_CASES) +@pytest.mark.parametrize('verify', (True, False)) def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool): - with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / expected_file, mode='r') as f: expected = f.read() p = tmp_path / input_file - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: p.write_text(f.read()) - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: input_ = f.read() args = [nbstripout_exe(), p] + args if verify: - args.append("--verify") + args.append('--verify') pc = run(args, stdout=PIPE, universal_newlines=True) output = pc.stdout.strip() if verify: if expected != input_: - assert "Dry run: would have stripped" in output + assert 'Dry run: would have stripped' in output assert pc.returncode == 1 # Since verify implies --dry-run, we make sure the file is not modified - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: output_ = f.read() assert output_ == input_ @@ -111,15 +139,15 @@ def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], t assert not pc.stdout and p.read_text() == expected -@pytest.mark.parametrize("input_file, extra_args, any_change", DRY_RUN_CASES) -@pytest.mark.parametrize("verify", (True, False)) -def test_dry_run_stdin(input_file: str, extra_args: List[str], any_change:bool, verify: bool): - expected = "Dry run: would have stripped input from stdin\n" +@pytest.mark.parametrize('input_file, extra_args, any_change', DRY_RUN_CASES) +@pytest.mark.parametrize('verify', (True, False)) +def test_dry_run_stdin(input_file: str, extra_args: List[str], any_change: bool, verify: bool): + expected = 'Dry run: would have stripped input from stdin\n' - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: - args = [nbstripout_exe(), "--dry-run"] + extra_args + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: + args = [nbstripout_exe(), '--dry-run'] + extra_args if verify: - args.append("--verify") + args.append('--verify') pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True) output = pc.stdout @@ -127,13 +155,17 @@ def test_dry_run_stdin(input_file: str, extra_args: List[str], any_change:bool, assert pc.returncode == (1 if verify and any_change else 0) -@pytest.mark.parametrize("input_file, extra_args, any_change", DRY_RUN_CASES) -@pytest.mark.parametrize("verify", (True, False)) +@pytest.mark.parametrize('input_file, extra_args, any_change', DRY_RUN_CASES) +@pytest.mark.parametrize('verify', (True, False)) def test_dry_run_args(input_file: str, extra_args: List[str], any_change: bool, verify: bool): - expected_regex = re.compile(f"Dry run: would have stripped .*[/\\\\]{input_file}\n") - args = [nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args + expected_regex = re.compile(f'Dry run: would have stripped .*[/\\\\]{input_file}\n') + args = [ + nbstripout_exe(), + str(NOTEBOOKS_FOLDER / input_file), + '--dry-run', + ] + extra_args if verify: - args.append("--verify") + args.append('--verify') pc = run(args, stdout=PIPE, universal_newlines=True) output = pc.stdout @@ -141,10 +173,10 @@ def test_dry_run_args(input_file: str, extra_args: List[str], any_change: bool, assert pc.returncode == (1 if verify and any_change else 0) -@pytest.mark.parametrize("input_file, expected_errs, extra_args", ERR_OUTPUT_CASES) +@pytest.mark.parametrize('input_file, expected_errs, extra_args', ERR_OUTPUT_CASES) def test_make_errors(input_file: str, expected_errs: List[Union[str, Pattern]], extra_args: List[str]): - with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: - pc = run([nbstripout_exe(), "--dry-run"] + extra_args, stdin=f, stderr=PIPE, universal_newlines=True) + with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f: + pc = run([nbstripout_exe(), '--dry-run'] + extra_args, stdin=f, stderr=PIPE, universal_newlines=True) err_output = pc.stderr for e in expected_errs: diff --git a/tests/test_git_integration.py b/tests/test_git_integration.py index 1a85512..6d4c62c 100644 --- a/tests/test_git_integration.py +++ b/tests/test_git_integration.py @@ -6,7 +6,7 @@ import pytest # fix this before pytester.chdir() happens -NOTEBOOKS_FOLDER = Path("tests").absolute() +NOTEBOOKS_FOLDER = Path('tests').absolute() def test_install(pytester: pytest.Pytester): @@ -15,44 +15,40 @@ def test_install(pytester: pytest.Pytester): pytester.run('nbstripout', '--install') assert pytester.run('nbstripout', '--is-installed').ret == 0 - with open(".git/info/attributes", "r") as f: + with open('.git/info/attributes', 'r') as f: attr_lines = f.readlines() - assert "*.ipynb filter=nbstripout\n" in attr_lines - assert "*.zpln filter=nbstripout\n" in attr_lines - assert "*.ipynb diff=ipynb\n" in attr_lines + assert '*.ipynb filter=nbstripout\n' in attr_lines + assert '*.zpln filter=nbstripout\n' in attr_lines + assert '*.ipynb diff=ipynb\n' in attr_lines config = ConfigParser() - config.read(".git/config") - assert re.match(r".*python.* -m nbstripout", config['filter "nbstripout"']["clean"]) - assert config['filter "nbstripout"']["required"] == "true" - assert config['filter "nbstripout"']["smudge"] == "cat" - assert re.match(r".*python.* -m nbstripout -t", config['diff "ipynb"']["textconv"]) + config.read('.git/config') + assert re.match(r'.*python.* -m nbstripout', config['filter "nbstripout"']['clean']) + assert config['filter "nbstripout"']['required'] == 'true' + assert config['filter "nbstripout"']['smudge'] == 'cat' + assert re.match(r'.*python.* -m nbstripout -t', config['diff "ipynb"']['textconv']) def test_install_different_python(pytester: pytest.Pytester): - pytester.run("git", "init") - assert pytester.run("nbstripout", "--is-installed").ret == 1 - pytester.run("nbstripout", "--install", "--python", "DIFFERENTPYTHON") - assert pytester.run("nbstripout", "--is-installed").ret == 0 + pytester.run('git', 'init') + assert pytester.run('nbstripout', '--is-installed').ret == 1 + pytester.run('nbstripout', '--install', '--python', 'DIFFERENTPYTHON') + assert pytester.run('nbstripout', '--is-installed').ret == 0 config = ConfigParser() - config.read(".git/config") - assert re.match( - r".*DIFFERENTPYTHON.* -m nbstripout", config['filter "nbstripout"']["clean"] - ) - assert sys.executable not in config['filter "nbstripout"']["clean"] - assert config['filter "nbstripout"']["required"] == "true" - assert config['filter "nbstripout"']["smudge"] == "cat" - assert re.match( - r".*DIFFERENTPYTHON.* -m nbstripout -t", config['diff "ipynb"']["textconv"] - ) - assert sys.executable not in config['diff "ipynb"']["textconv"] + config.read('.git/config') + assert re.match(r'.*DIFFERENTPYTHON.* -m nbstripout', config['filter "nbstripout"']['clean']) + assert sys.executable not in config['filter "nbstripout"']['clean'] + assert config['filter "nbstripout"']['required'] == 'true' + assert config['filter "nbstripout"']['smudge'] == 'cat' + assert re.match(r'.*DIFFERENTPYTHON.* -m nbstripout -t', config['diff "ipynb"']['textconv']) + assert sys.executable not in config['diff "ipynb"']['textconv'] def test_uninstall(pytester: pytest.Pytester): pytester.run('git', 'init') # add extra filter at the start, so we can check we don't remove it - pytester.path.joinpath(".git/info/attributes").write_text("*.txt text") + pytester.path.joinpath('.git/info/attributes').write_text('*.txt text') # do the install and verify pytester.run('nbstripout', '--install') @@ -62,15 +58,15 @@ def test_uninstall(pytester: pytest.Pytester): pytester.run('nbstripout', '--uninstall') assert pytester.run('nbstripout', '--is-installed').ret == 1 - with open(".git/info/attributes", "r") as f: + with open('.git/info/attributes', 'r') as f: attr_lines = f.readlines() - assert "*.txt text\n" in attr_lines # still there and not removed + assert '*.txt text\n' in attr_lines # still there and not removed assert len(attr_lines) == 1 config = ConfigParser() - config.read(".git/config") + config.read('.git/config') assert 'filter "nbstripout"' not in config - assert 'diff "ipynb"'not in config + assert 'diff "ipynb"' not in config def test_uninstall_leave_extrakeys(pytester: pytest.Pytester): @@ -107,7 +103,8 @@ def test_status(pytester: pytest.Pytester): pytester.run('nbstripout', '--install') r = pytester.run('nbstripout', '--status') assert r.ret == 0 - r.stdout.re_match_lines(r"""nbstripout is installed in repository .* + r.stdout.re_match_lines( + r"""nbstripout is installed in repository .* \s* Filter: clean = .* -m nbstripout @@ -120,7 +117,8 @@ def test_status(pytester: pytest.Pytester): \s* Diff Attributes: \*.ipynb: diff: ipynb -""".splitlines()) +""".splitlines() + ) # uninstall and verify pytester.run('nbstripout', '--uninstall') @@ -134,8 +132,18 @@ def test_git_diff_nodiff(pytester: pytest.Pytester): pytester.run('git', 'config', '--local', 'filter.nbstripout.extrakeys', ' ') pytester.run('nbstripout', '--install') - r = pytester.run('git', 'diff', '--no-index', '--no-ext-diff', '--unified=0', '--exit-code', '-a', '--no-prefix', - NOTEBOOKS_FOLDER / "test_diff.ipynb", NOTEBOOKS_FOLDER / "test_diff_output.ipynb") + r = pytester.run( + 'git', + 'diff', + '--no-index', + '--no-ext-diff', + '--unified=0', + '--exit-code', + '-a', + '--no-prefix', + NOTEBOOKS_FOLDER / 'test_diff.ipynb', + NOTEBOOKS_FOLDER / 'test_diff_output.ipynb', + ) assert r.ret == 0 assert not r.outlines @@ -145,9 +153,16 @@ def test_git_diff_diff(pytester: pytest.Pytester): pytester.run('git', 'config', '--local', 'filter.nbstripout.extrakeys', ' ') pytester.run('nbstripout', '--install') - r = pytester.run('git', 'diff', '--no-index', NOTEBOOKS_FOLDER / "test_diff.ipynb", NOTEBOOKS_FOLDER / "test_diff_different_extrakeys.ipynb") + r = pytester.run( + 'git', + 'diff', + '--no-index', + NOTEBOOKS_FOLDER / 'test_diff.ipynb', + NOTEBOOKS_FOLDER / 'test_diff_different_extrakeys.ipynb', + ) assert r.ret == 1 - r.stdout.fnmatch_lines(r"""index* + r.stdout.fnmatch_lines( + r"""index* --- *test_diff.ipynb* +++ *test_diff_different_extrakeys.ipynb* @@ -6,15 +6,14 @@ @@ -168,18 +183,28 @@ def test_git_diff_diff(pytester: pytest.Pytester): }, "language_info": { "codemirror_mode": { -""".splitlines()) +""".splitlines() + ) assert len(r.outlines) == 22 # 21 lines + new line at end def test_git_diff_extrakeys(pytester: pytest.Pytester): pytester.run('git', 'init') - pytester.run('git', 'config', '--local', 'filter.nbstripout.extrakeys', 'cell.metadata.collapsed metadata.kernelspec.name') + pytester.run( + 'git', 'config', '--local', 'filter.nbstripout.extrakeys', 'cell.metadata.collapsed metadata.kernelspec.name' + ) pytester.run('nbstripout', '--install') - r = pytester.run('git', 'diff', '--no-index', NOTEBOOKS_FOLDER / "test_diff.ipynb", NOTEBOOKS_FOLDER / "test_diff_different_extrakeys.ipynb") + r = pytester.run( + 'git', + 'diff', + '--no-index', + NOTEBOOKS_FOLDER / 'test_diff.ipynb', + NOTEBOOKS_FOLDER / 'test_diff_different_extrakeys.ipynb', + ) assert r.ret == 1 - r.stdout.fnmatch_lines(r"""index* + r.stdout.fnmatch_lines( + r"""index* --- *test_diff.ipynb* +++ *test_diff_different_extrakeys.ipynb* @@ -6,7 +6,7 @@ @@ -191,18 +216,28 @@ def test_git_diff_extrakeys(pytester: pytest.Pytester): ] } ], -""".splitlines()) +""".splitlines() + ) assert len(r.outlines) == 13 # 12 lines + new line at end def test_git_diff_keepmetadatakeys(pytester: pytest.Pytester): pytester.run('git', 'init') - pytester.run('git', 'config', '--local', 'filter.nbstripout.keepmetadatakeys', 'cell.metadata.scrolled metadata.foo.bar') + pytester.run( + 'git', 'config', '--local', 'filter.nbstripout.keepmetadatakeys', 'cell.metadata.scrolled metadata.foo.bar' + ) pytester.run('nbstripout', '--install') - r = pytester.run('git', 'diff', '--no-index', NOTEBOOKS_FOLDER / "test_diff.ipynb", NOTEBOOKS_FOLDER / "test_diff_different_extrakeys.ipynb") + r = pytester.run( + 'git', + 'diff', + '--no-index', + NOTEBOOKS_FOLDER / 'test_diff.ipynb', + NOTEBOOKS_FOLDER / 'test_diff_different_extrakeys.ipynb', + ) assert r.ret == 1 - r.stdout.fnmatch_lines(r"""index* + r.stdout.fnmatch_lines( + r"""index* --- *test_diff.ipynb* +++ *test_diff_different_extrakeys.ipynb* @@ -3,20 +3,17 @@ @@ -220,5 +255,6 @@ def test_git_diff_keepmetadatakeys(pytester: pytest.Pytester): ] } ], -""".splitlines()) +""".splitlines() + ) assert len(r.outlines) == 28 # 12 lines + new line at end diff --git a/tests/test_keep_output_tags.py b/tests/test_keep_output_tags.py index 4243a82..b29d361 100644 --- a/tests/test_keep_output_tags.py +++ b/tests/test_keep_output_tags.py @@ -27,7 +27,7 @@ def test_cells(orig_nb): nb_stripped = strip_output(nb_stripped, keep_output=None, keep_count=None, keep_id=None) for i, cell in enumerate(nb_stripped.cells): if cell.cell_type == 'code' and cell.source: - match = re.match(r"\s*#\s*(output|no_output)", cell.source) + match = re.match(r'\s*#\s*(output|no_output)', cell.source) if match: # original cell should have had output. # If not, there's a problem with the test fixture