Skip to content

Commit

Permalink
Add --verify flag, closes #153
Browse files Browse the repository at this point in the history
This mode is similar to `--dry-run`, however returns with status 1 if any change would have been made to any affected files.

---------

Co-authored-by: Florian Rathgeber <[email protected]>
  • Loading branch information
jspaezp and kynan authored Nov 3, 2024
1 parent 902e9ea commit b8fd98c
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 16 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ Do a dry run and only list which files would have been stripped:

nbstripout --dry-run FILE.ipynb [FILE2.ipynb ...]

or

Do a verification run, which works like dry run but will fail
if any files would have been stripped:

nbstripout --verify FILE.ipynb [FILE2.ipynb ...]

Operate on all `.ipynb` files in the current directory and subdirectories
recursively:

Expand Down
33 changes: 28 additions & 5 deletions nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@

from argparse import ArgumentParser, RawDescriptionHelpFormatter
import collections
import copy
import io
import json
from os import devnull, environ, makedirs, path
Expand Down Expand Up @@ -331,28 +332,38 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
return 1

def process_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'):
any_change = False
if args.mode == 'zeppelin':
nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict)
nb_orig = copy.deepcopy(nb)
nb_stripped = strip_zeppelin_output(nb)
if nb_orig != nb_stripped:
any_change = True

if args.dry_run:
output_stream.write(f'Dry run: would have stripped {filename}\n')
return
return any_change
if output_stream.seekable():
output_stream.seek(0)
output_stream.truncate()
json.dump(nb_stripped, output_stream, indent=2)
output_stream.write('\n')
output_stream.flush()
return
return any_change

with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT)

nb_orig = copy.deepcopy(nb)
nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id,
extra_keys, args.drop_empty_cells,
args.drop_tagged_cells.split(), args.strip_init_cells,
_parse_size(args.max_size))

if nb_orig != nb:
any_change = True

if args.dry_run:
output_stream.write(f'Dry run: would have stripped {filename}\n')
else:
Expand All @@ -363,7 +374,7 @@ def process_notebook(input_stream, output_stream, args, extra_keys, filename='in
warnings.simplefilter("ignore", category=UserWarning)
nbformat.write(nb, output_stream)
output_stream.flush()

return any_change

def main():
parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
Expand All @@ -383,6 +394,8 @@ def main():
'repository and configuration summary if installed')
task.add_argument('--version', action='store_true',
help='Print version')
parser.add_argument("--verify", action="store_true",
help="Return a non-zero exit code if any files were changed, Implies --dry-run")
parser.add_argument('--keep-count', action='store_true',
help='Do not strip the execution count/prompt number')
parser.add_argument('--keep-output', action='store_true',
Expand Down Expand Up @@ -428,6 +441,9 @@ def main():
args = parser.parse_args()
git_config = ['git', 'config']

if args.verify and not args.dry_run:
args.dry_run = True

if args._system:
git_config.append('--system')
install_location = INSTALL_LOCATION_SYSTEM
Expand Down Expand Up @@ -483,14 +499,17 @@ def main():
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='')

any_change = False
for filename in args.files:
if not (args.force or filename.endswith('.ipynb') or filename.endswith('.zpln')):
continue

try:
with io.open(filename, 'r+', encoding='utf8', newline='') as f:
out = output_stream if args.textconv or args.dry_run else f
process_notebook(f, out, args, extra_keys, filename)
if process_notebook(f, out, args, extra_keys, filename):
any_change = True

except nbformat.reader.NotJSONError:
print(f"No valid notebook detected in '{filename}'", file=sys.stderr)
raise SystemExit(1)
Expand All @@ -504,7 +523,11 @@ def main():

if not args.files and input_stream:
try:
process_notebook(input_stream, output_stream, args, extra_keys)
if process_notebook(input_stream, output_stream, args, extra_keys):
any_change = True
except nbformat.reader.NotJSONError:
print('No valid notebook detected on stdin', file=sys.stderr)
raise SystemExit(1)

if args.verify and any_change:
raise SystemExit(1)
65 changes: 54 additions & 11 deletions tests/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,49 +53,92 @@ def nbstripout_exe():


@pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES)
def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str]):
@pytest.mark.parametrize("verify", (True, False))
def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool):
with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f:
expected = f.read()

with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
pc = run([nbstripout_exe()] + args, stdin=f, stdout=PIPE, universal_newlines=True)
input_ = f.read()

with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
args = [nbstripout_exe()] + args
if verify:
args.append("--verify")
pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
output = pc.stdout

assert output == expected
if verify:
# When using stin, the dry run flag is disregarded.
assert pc.returncode == (1 if input_ != expected else 0)
else:
assert output == expected
assert pc.returncode == 0


@pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES)
def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path):
@pytest.mark.parametrize("verify", (True, False))
def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool):
with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f:
expected = f.read()

p = tmp_path / input_file
with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
p.write_text(f.read())
pc = run([nbstripout_exe(), p] + args, stdout=PIPE, universal_newlines=True)

assert not pc.stdout and p.read_text() == expected
with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
input_ = f.read()

args = [nbstripout_exe(), p] + args
if verify:
args.append("--verify")
pc = run(args, stdout=PIPE, universal_newlines=True)

output = pc.stdout.strip()
if verify:
if expected != input_:
assert "Dry run: would have stripped" in output
assert pc.returncode == 1

# Since verify implies --dry-run, we make sure the file is not modified
with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
output_ = f.read()

assert output_ == input_
else:
assert pc.returncode == 0
assert not pc.stdout and p.read_text() == expected


@pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES)
def test_dry_run_stdin(input_file: str, extra_args: List[str]):
@pytest.mark.parametrize("verify", (True, False))
def test_dry_run_stdin(input_file: str, extra_args: List[str], verify: bool):
expected = "Dry run: would have stripped input from stdin\n"

with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
pc = run([nbstripout_exe(), "--dry-run"] + extra_args, stdin=f, stdout=PIPE, universal_newlines=True)
args = [nbstripout_exe(), "--dry-run"] + extra_args
if verify:
args.append("--verify")
pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
output = pc.stdout

assert output == expected
assert pc.returncode == (1 if verify else 0)


@pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES)
def test_dry_run_args(input_file: str, extra_args: List[str]):
@pytest.mark.parametrize("verify", (True, False))
def test_dry_run_args(input_file: str, extra_args: List[str], verify: bool):
expected_regex = re.compile(f"Dry run: would have stripped .*[/\\\\]{input_file}\n")

pc = run([nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args, stdout=PIPE, universal_newlines=True)
args = [nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args
if verify:
args.append("--verify")
pc = run(args, stdout=PIPE, universal_newlines=True)
output = pc.stdout

assert expected_regex.match(output)
if verify:
assert pc.returncode == 1


@pytest.mark.parametrize("input_file, expected_errs, extra_args", ERR_OUTPUT_CASES)
Expand Down

0 comments on commit b8fd98c

Please sign in to comment.