From 2310b0e7310f9b7f620d248ce4d23992011b9c7c Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Mon, 22 Jul 2024 10:24:10 -0700 Subject: [PATCH 1/8] initial changes --- sleap/nn/inference.py | 84 +++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py index 7f9e91ec9..3ba604d8d 100644 --- a/sleap/nn/inference.py +++ b/sleap/nn/inference.py @@ -5285,8 +5285,8 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: args: Parsed CLI namespace. Returns: - A tuple of `(provider, data_path)` with the data `Provider` and path to the data - that was specified in the args. + `(provider_list, data_path_list, output_path_list)` with the data `Provider`, path to the data + that was specified in the args, and list out output paths if a csv file was inputed. """ # Figure out which input path to use. @@ -5299,51 +5299,75 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: ) data_path_obj = Path(data_path) + + # Set output_path_list to None as a default to return later + output_path_list = None # Check that input value is valid if not data_path_obj.exists(): raise ValueError("Path to data_path does not exist") + + elif data_path_obj.suffix.lower() == ".csv": + try: + # Read the CSV file + df = pandas.read_csv(data_path) + + # Check if the 'data_path' and 'output_path' columns exist + if "data_path" in df.columns: + raw_data_path_list = df["data_path"].tolist() + else: + print("Column 'data_path' does not exist in data_path csv file.") + if "output_path" in df.columns: + output_path_list = df["output_path"].tolist() + + except FileNotFoundError as e: + raise ValueError(f"CSV file not found: {data_path}") from e + except pandas.errors.EmptyDataError as e: + raise ValueError(f"CSV file is empty: {data_path}") from e + except pandas.errors.ParserError as e: + raise ValueError(f"Error parsing CSV file: {data_path}") from e # Check for multiple video inputs # Compile file(s) into a list for later itteration - if data_path_obj.is_dir(): - data_path_list = [] + elif data_path_obj.is_dir(): + raw_data_path_list = [] for file_path in data_path_obj.iterdir(): if file_path.is_file(): - data_path_list.append(Path(file_path)) + raw_data_path_list.append(Path(file_path)) + elif data_path_obj.is_file(): - data_path_list = [data_path_obj] + raw_data_path_list = [data_path_obj] # Provider list to accomodate multiple video inputs - output_provider_list = [] - output_data_path_list = [] - for file_path in data_path_list: + provider_list = [] + data_path_list = [] + for file_path in raw_data_path_list: # Create a provider for each file - if file_path.as_posix().endswith(".slp") and len(data_path_list) > 1: + if file_path.as_posix().endswith(".slp") and len(raw_data_path_list) > 1: print(f"slp file skipped: {file_path.as_posix()}") elif file_path.as_posix().endswith(".slp"): labels = sleap.load_file(file_path.as_posix()) if args.only_labeled_frames: - output_provider_list.append( + provider_list.append( LabelsReader.from_user_labeled_frames(labels) ) elif args.only_suggested_frames: - output_provider_list.append( + provider_list.append( LabelsReader.from_unlabeled_suggestions(labels) ) elif getattr(args, "video.index") != "": - output_provider_list.append( + provider_list.append( VideoReader( video=labels.videos[int(getattr(args, "video.index"))], example_indices=frame_list(args.frames), ) ) else: - output_provider_list.append(LabelsReader(labels)) + provider_list.append(LabelsReader(labels)) - output_data_path_list.append(file_path) + data_path_list.append(file_path) else: try: @@ -5351,7 +5375,7 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: dataset=vars(args).get("video.dataset"), input_format=vars(args).get("video.input_format"), ) - output_provider_list.append( + provider_list.append( VideoReader.from_filepath( filename=file_path.as_posix(), example_indices=frame_list(args.frames), @@ -5359,12 +5383,12 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: ) ) print(f"Video: {file_path.as_posix()}") - output_data_path_list.append(file_path) + data_path_list.append(file_path) # TODO: Clean this up. except Exception: print(f"Error reading file: {file_path.as_posix()}") - return output_provider_list, output_data_path_list + return provider_list, data_path_list, output_path_list def _make_predictor_from_cli(args: argparse.Namespace) -> Predictor: @@ -5496,10 +5520,12 @@ def main(args: Optional[list] = None): print() # Setup data loader. - provider_list, data_path_list = _make_provider_from_cli(args) - - output_path = args.output + provider_list, data_path_list, output_path_list = _make_provider_from_cli(args) +# if output_path has not been extracted from a csv file yet + if output_path_list is None: + output_path = args.output + # check if output_path is valid before running inference if ( output_path is not None @@ -5520,7 +5546,7 @@ def main(args: Optional[list] = None): if args.models is not None: # Run inference on all files inputed - for data_path, provider in zip(data_path_list, provider_list): + for i, (data_path, provider) in enumerate(zip(data_path_list, provider_list)): # Setup models. data_path_obj = Path(data_path) predictor = _make_predictor_from_cli(args) @@ -5531,11 +5557,17 @@ def main(args: Optional[list] = None): # if output path was not provided, create an output path if output_path is None: - output_path = f"{data_path.as_posix()}.predictions.slp" - output_path_obj = Path(output_path) + output_path = data_path + ".predictions.slp" + # if output path was not provided, create an output path + if output_path_list is not None: + output_path = output_path_list[i] + + elif output_path is None: + output_path = f"{data_path.as_posix()}.predictions.slp" + output_path_obj = Path(output_path) - else: - output_path_obj = Path(output_path) + else: + output_path_obj = Path(output_path) # if output_path was provided and multiple inputs were provided, create a directory to store outputs if len(data_path_list) > 1: From 4e873b8c42161261c1435874d487325429e040ad Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Mon, 22 Jul 2024 12:15:14 -0700 Subject: [PATCH 2/8] csv support and test case --- sleap/nn/inference.py | 110 ++++++++++++++++++++----------------- tests/nn/test_inference.py | 54 ++++++++++++++++++ 2 files changed, 113 insertions(+), 51 deletions(-) diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py index 3ba604d8d..a76ccd7ce 100644 --- a/sleap/nn/inference.py +++ b/sleap/nn/inference.py @@ -33,6 +33,7 @@ import atexit import subprocess import rich.progress +import pandas from rich.pretty import pprint from collections import deque import json @@ -5299,44 +5300,57 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: ) data_path_obj = Path(data_path) - + # Set output_path_list to None as a default to return later output_path_list = None # Check that input value is valid if not data_path_obj.exists(): raise ValueError("Path to data_path does not exist") - - elif data_path_obj.suffix.lower() == ".csv": - try: - # Read the CSV file - df = pandas.read_csv(data_path) - # Check if the 'data_path' and 'output_path' columns exist - if "data_path" in df.columns: - raw_data_path_list = df["data_path"].tolist() - else: - print("Column 'data_path' does not exist in data_path csv file.") - if "output_path" in df.columns: - output_path_list = df["output_path"].tolist() + elif data_path_obj.is_file(): + # If the file is a CSV file, check for data_paths and output_paths + if data_path_obj.suffix.lower() == ".csv": + try: + # Read the CSV file + df = pandas.read_csv(data_path) + + # collect data_paths from column + if "data_path" in df.columns: + raw_data_path_list = df["data_path"].tolist() + else: + raise ValueError( + "Column 'data_path' does not exist in the CSV file." + ) + + # optional output_path column to specify multiple output_paths + if "output_path" in df.columns: + output_path_list = df["output_path"].tolist() + + except FileNotFoundError as e: + raise ValueError(f"CSV file not found: {data_path}") from e + except pandas.errors.EmptyDataError as e: + raise ValueError(f"CSV file is empty: {data_path}") from e + except pandas.errors.ParserError as e: + raise ValueError(f"Error parsing CSV file: {data_path}") from e + + # If the file is a text file, collect data_paths + elif data_path_obj.suffix.lower() == ".txt": + with open(data_path_obj, "r") as file: + raw_data_path_list = [line.strip() for line in file.readlines()] - except FileNotFoundError as e: - raise ValueError(f"CSV file not found: {data_path}") from e - except pandas.errors.EmptyDataError as e: - raise ValueError(f"CSV file is empty: {data_path}") from e - except pandas.errors.ParserError as e: - raise ValueError(f"Error parsing CSV file: {data_path}") from e + # Else, the file is a single data_path + else: + raw_data_path_list = [data_path_obj] + + raw_data_path_list = [Path(p) for p in raw_data_path_list] # Check for multiple video inputs - # Compile file(s) into a list for later itteration + # Compile file(s) into a list for later iteration elif data_path_obj.is_dir(): - raw_data_path_list = [] - for file_path in data_path_obj.iterdir(): - if file_path.is_file(): - raw_data_path_list.append(Path(file_path)) - - elif data_path_obj.is_file(): - raw_data_path_list = [data_path_obj] + raw_data_path_list = [ + file_path for file_path in data_path_obj.iterdir() if file_path.is_file() + ] # Provider list to accomodate multiple video inputs provider_list = [] @@ -5350,13 +5364,9 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: labels = sleap.load_file(file_path.as_posix()) if args.only_labeled_frames: - provider_list.append( - LabelsReader.from_user_labeled_frames(labels) - ) + provider_list.append(LabelsReader.from_user_labeled_frames(labels)) elif args.only_suggested_frames: - provider_list.append( - LabelsReader.from_unlabeled_suggestions(labels) - ) + provider_list.append(LabelsReader.from_unlabeled_suggestions(labels)) elif getattr(args, "video.index") != "": provider_list.append( VideoReader( @@ -5522,10 +5532,12 @@ def main(args: Optional[list] = None): # Setup data loader. provider_list, data_path_list, output_path_list = _make_provider_from_cli(args) -# if output_path has not been extracted from a csv file yet + output_path = None + + # if output_path has not been extracted from a csv file yet if output_path_list is None: output_path = args.output - + # check if output_path is valid before running inference if ( output_path is not None @@ -5557,27 +5569,23 @@ def main(args: Optional[list] = None): # if output path was not provided, create an output path if output_path is None: - output_path = data_path + ".predictions.slp" # if output path was not provided, create an output path - if output_path_list is not None: + if output_path_list: output_path = output_path_list[i] - - elif output_path is None: - output_path = f"{data_path.as_posix()}.predictions.slp" - output_path_obj = Path(output_path) else: - output_path_obj = Path(output_path) + output_path = f"{data_path.as_posix()}.predictions.slp" - # if output_path was provided and multiple inputs were provided, create a directory to store outputs - if len(data_path_list) > 1: - output_path = ( - output_path_obj - / data_path_obj.with_suffix(".predictions.slp").name - ) - output_path_obj = Path(output_path) - # Create the containing directory if needed. - output_path_obj.parent.mkdir(exist_ok=True, parents=True) + output_path_obj = Path(output_path) + + # if output_path was provided and multiple inputs were provided, create a directory to store outputs + if len(data_path_list) > 1: + output_path = ( + output_path_obj / data_path_obj.with_suffix(".predictions.slp").name + ) + output_path_obj = Path(output_path) + # Create the containing directory if needed. + output_path_obj.parent.mkdir(exist_ok=True, parents=True) labels_pr.provenance["model_paths"] = predictor.model_paths labels_pr.provenance["predictor"] = type(predictor).__name__ diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py index f99f136ab..0678b1eee 100644 --- a/tests/nn/test_inference.py +++ b/tests/nn/test_inference.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import cast import shutil +import csv import numpy as np import pytest @@ -1747,6 +1748,59 @@ def test_sleap_track_invalid_input( sleap_track(args=args) +def test_sleap_track_csv_input( + min_centroid_model_path: str, + min_centered_instance_model_path: str, + centered_pair_vid_path, + tmpdir, +): + + # Create temporary directory with the structured video files + slp_path = Path(tmpdir.mkdir("mp4_directory")) + + # Copy and paste the video into the temp dir multiple times + num_copies = 3 + file_paths = [] + for i in range(num_copies): + # Construct the destination path with a unique name + dest_path = slp_path / f"centered_pair_vid_copy_{i}.mp4" + shutil.copy(centered_pair_vid_path, dest_path) + file_paths.append(dest_path) + + # Generate output paths for each data_path + output_paths = [ + file_path.with_suffix(".TESTpredictions.slp") for file_path in file_paths + ] + + # Create a CSV file with the file paths + csv_file_path = slp_path / "file_paths.csv" + with open(csv_file_path, mode="w", newline="") as csv_file: + csv_writer = csv.writer(csv_file) + csv_writer.writerow(["data_path", "output_path"]) + for data_path, output_path in zip(file_paths, output_paths): + csv_writer.writerow([data_path, output_path]) + + slp_path_obj = Path(slp_path) + + # Create sleap-track command + args = ( + f"{csv_file_path} --model {min_centroid_model_path} " + f"--tracking.tracker simple " + f"--model {min_centered_instance_model_path} --video.index 0 --frames 1-3 --cpu" + ).split() + + slp_path_list = [file for file in slp_path_obj.iterdir() if file.is_file()] + + # Run inference + sleap_track(args=args) + + # Assert predictions file exists + for file_path in slp_path_list: + if file_path.suffix == ".mp4": + expected_output_file = file_path.with_suffix(".TESTpredictions.slp") + assert Path(expected_output_file).exists() + + def test_flow_tracker(centered_pair_predictions: Labels, tmpdir): """Test flow tracker instances are pruned.""" labels: Labels = centered_pair_predictions From f41ea2a58eda75156969eb0ba416c900c6948137 Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Mon, 22 Jul 2024 15:13:51 -0700 Subject: [PATCH 3/8] increased code coverage --- sleap/nn/inference.py | 47 ++++++++-------- tests/nn/test_inference.py | 108 +++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 23 deletions(-) diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py index a76ccd7ce..8b3ae9eaa 100644 --- a/sleap/nn/inference.py +++ b/sleap/nn/inference.py @@ -5320,28 +5320,28 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: raw_data_path_list = df["data_path"].tolist() else: raise ValueError( - "Column 'data_path' does not exist in the CSV file." + f"Column 'data_path' does not exist in the CSV file: {data_path}" ) # optional output_path column to specify multiple output_paths if "output_path" in df.columns: output_path_list = df["output_path"].tolist() - except FileNotFoundError as e: - raise ValueError(f"CSV file not found: {data_path}") from e except pandas.errors.EmptyDataError as e: - raise ValueError(f"CSV file is empty: {data_path}") from e - except pandas.errors.ParserError as e: - raise ValueError(f"Error parsing CSV file: {data_path}") from e + raise ValueError(f"CSV file is empty: {data_path}. Error: {e}") from e + # If the file is a text file, collect data_paths elif data_path_obj.suffix.lower() == ".txt": - with open(data_path_obj, "r") as file: - raw_data_path_list = [line.strip() for line in file.readlines()] - - # Else, the file is a single data_path + try: + with open(data_path_obj, "r") as file: + raw_data_path_list = [line.strip() for line in file.readlines()] + except Exception as e: + raise ValueError( + f"Error reading text file: {data_path}. Error: {e}" + ) from e else: - raw_data_path_list = [data_path_obj] + raw_data_path_list = [str(data_path_obj)] raw_data_path_list = [Path(p) for p in raw_data_path_list] @@ -5535,18 +5535,18 @@ def main(args: Optional[list] = None): output_path = None # if output_path has not been extracted from a csv file yet - if output_path_list is None: + if output_path_list is None and args.output is not None: output_path = args.output + output_path_obj = Path(output_path) - # check if output_path is valid before running inference - if ( - output_path is not None - and Path(output_path).is_file() - and len(data_path_list) > 1 - ): - raise ValueError( - "output_path argument must be a directory if multiple video inputs are given" - ) + # check if output_path is valid before running inference + if ( + Path(output_path).is_file() + and len(data_path_list) > 1 + ): + raise ValueError( + "output_path argument must be a directory if multiple video inputs are given" + ) # Setup tracker. tracker = _make_tracker_from_cli(args) @@ -5576,10 +5576,11 @@ def main(args: Optional[list] = None): else: output_path = f"{data_path.as_posix()}.predictions.slp" - output_path_obj = Path(output_path) + output_path_obj = Path(output_path) # if output_path was provided and multiple inputs were provided, create a directory to store outputs - if len(data_path_list) > 1: + elif len(data_path_list) > 1: + output_path_obj = Path(output_path) output_path = ( output_path_obj / data_path_obj.with_suffix(".predictions.slp").name ) diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py index 0678b1eee..646049256 100644 --- a/tests/nn/test_inference.py +++ b/tests/nn/test_inference.py @@ -8,6 +8,7 @@ import numpy as np import pytest +import pandas import tensorflow as tf import tensorflow_hub as hub from numpy.testing import assert_array_equal, assert_allclose @@ -1686,12 +1687,17 @@ def test_sleap_track_output_mult( sleap_track(args=args) slp_path = Path(slp_path) + print(f"Contents of the directory {slp_path_obj}:") + for file in slp_path_obj.iterdir(): + print(file) + # Check if there are any files in the directory for file_path in slp_path_list: if file_path.suffix == ".mp4": expected_output_file = output_path_obj / ( file_path.stem + ".predictions.slp" ) + print(f"expected output: {expected_output_file}") assert Path(expected_output_file).exists() @@ -1747,6 +1753,20 @@ def test_sleap_track_invalid_input( with pytest.raises(ValueError): sleap_track(args=args) + # Test with a non-existent path + slp_path = "/path/to/nonexistent/file.mp4" + + # Create sleap-track command for non-existent path + args = ( + f"{slp_path} --model {min_centroid_model_path} " + f"--tracking.tracker simple " + f"--model {min_centered_instance_model_path} --video.index 0 --frames 1-3 --cpu" + ).split() + + # Run inference and expect a ValueError for non-existent path + with pytest.raises(ValueError): + sleap_track(args=args) + def test_sleap_track_csv_input( min_centroid_model_path: str, @@ -1801,6 +1821,94 @@ def test_sleap_track_csv_input( assert Path(expected_output_file).exists() +def test_sleap_track_invalid_csv( + min_centroid_model_path: str, + min_centered_instance_model_path: str, + tmpdir, +): + + # Create a CSV file with missing 'data_path' column + csv_missing_column_path = tmpdir / "missing_column.csv" + df_missing_column = pandas.DataFrame( + {"some_other_column": ["video1.mp4", "video2.mp4", "video3.mp4"]} + ) + df_missing_column.to_csv(csv_missing_column_path, index=False) + + # Create an empty CSV file + csv_empty_path = tmpdir / "empty.csv" + open(csv_empty_path, "w").close() + + # Create sleap-track command for missing 'data_path' column + args_missing_column = ( + f"{csv_missing_column_path} --model {min_centroid_model_path} " + f"--tracking.tracker simple " + f"--model {min_centered_instance_model_path} --video.index 0 --frames 1-3 --cpu" + ).split() + + # Run inference and expect ValueError for missing 'data_path' column + with pytest.raises( + ValueError, match="Column 'data_path' does not exist in the CSV file." + ): + sleap_track(args=args_missing_column) + + # Create sleap-track command for empty CSV file + args_empty = ( + f"{csv_empty_path} --model {min_centroid_model_path} " + f"--tracking.tracker simple " + f"--model {min_centered_instance_model_path} --video.index 0 --frames 1-3 --cpu" + ).split() + + # Run inference and expect ValueError for empty CSV file + with pytest.raises(ValueError, match=f"CSV file is empty: {csv_empty_path}"): + sleap_track(args=args_empty) + + +def test_sleap_track_text_file_input( + min_centroid_model_path: str, + min_centered_instance_model_path: str, + centered_pair_vid_path, + tmpdir, +): + + # Create temporary directory with the structured video files + slp_path = Path(tmpdir.mkdir("mp4_directory")) + + # Copy and paste the video into the temp dir multiple times + num_copies = 3 + file_paths = [] + for i in range(num_copies): + # Construct the destination path with a unique name + dest_path = slp_path / f"centered_pair_vid_copy_{i}.mp4" + shutil.copy(centered_pair_vid_path, dest_path) + file_paths.append(dest_path) + + # Create a text file with the file paths + txt_file_path = slp_path / "file_paths.txt" + with open(txt_file_path, mode="w") as txt_file: + for file_path in file_paths: + txt_file.write(f"{file_path}\n") + + slp_path_obj = Path(slp_path) + + # Create sleap-track command + args = ( + f"{txt_file_path} --model {min_centroid_model_path} " + f"--tracking.tracker simple " + f"--model {min_centered_instance_model_path} --video.index 0 --frames 1-3 --cpu" + ).split() + + slp_path_list = [file for file in slp_path_obj.iterdir() if file.is_file()] + + # Run inference + sleap_track(args=args) + + # Assert predictions file exists + for file_path in slp_path_list: + if file_path.suffix == ".mp4": + expected_output_file = f"{file_path}.predictions.slp" + assert Path(expected_output_file).exists() + + def test_flow_tracker(centered_pair_predictions: Labels, tmpdir): """Test flow tracker instances are pruned.""" labels: Labels = centered_pair_predictions From a69a65c6677fdd0c6b9c0d4fcb698f7a4b1d2d6c Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Tue, 23 Jul 2024 13:53:09 -0700 Subject: [PATCH 4/8] Error fixing, black, deletion of (self-written) unused code --- sleap/nn/inference.py | 147 ++++++++++++++++++------------------- tests/nn/test_inference.py | 29 +++----- 2 files changed, 81 insertions(+), 95 deletions(-) diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py index 8b3ae9eaa..28319f792 100644 --- a/sleap/nn/inference.py +++ b/sleap/nn/inference.py @@ -33,7 +33,7 @@ import atexit import subprocess import rich.progress -import pandas +import pandas as pd from rich.pretty import pprint from collections import deque import json @@ -5312,25 +5312,36 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: # If the file is a CSV file, check for data_paths and output_paths if data_path_obj.suffix.lower() == ".csv": try: + data_path_column = None # Read the CSV file - df = pandas.read_csv(data_path) + df = pd.read_csv(data_path) # collect data_paths from column - if "data_path" in df.columns: - raw_data_path_list = df["data_path"].tolist() - else: + for col_index in range(df.shape[1]): + path_str = df.iloc[0, col_index] + if Path(path_str).exists(): + data_path_column = df.columns[col_index] + break + if data_path_column is None: raise ValueError( - f"Column 'data_path' does not exist in the CSV file: {data_path}" + f"Column containing valid data_paths does not exist in the CSV file: {data_path}" ) + raw_data_path_list = df[data_path_column].tolist() # optional output_path column to specify multiple output_paths - if "output_path" in df.columns: - output_path_list = df["output_path"].tolist() + output_path_column_index = df.columns.get_loc(data_path_column) + 1 + if ( + output_path_column_index < df.shape[1] + and df.iloc[:, output_path_column_index].dtype == object + ): + # Ensure the next column exists + output_path_list = df.iloc[:, output_path_column_index].tolist() + else: + output_path_list = None - except pandas.errors.EmptyDataError as e: + except pd.errors.EmptyDataError as e: raise ValueError(f"CSV file is empty: {data_path}. Error: {e}") from e - # If the file is a text file, collect data_paths elif data_path_obj.suffix.lower() == ".txt": try: @@ -5341,7 +5352,7 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: f"Error reading text file: {data_path}. Error: {e}" ) from e else: - raw_data_path_list = [str(data_path_obj)] + raw_data_path_list = [data_path_obj.as_posix()] raw_data_path_list = [Path(p) for p in raw_data_path_list] @@ -5540,10 +5551,7 @@ def main(args: Optional[list] = None): output_path_obj = Path(output_path) # check if output_path is valid before running inference - if ( - Path(output_path).is_file() - and len(data_path_list) > 1 - ): + if Path(output_path).is_file() and len(data_path_list) > 1: raise ValueError( "output_path argument must be a directory if multiple video inputs are given" ) @@ -5574,7 +5582,7 @@ def main(args: Optional[list] = None): output_path = output_path_list[i] else: - output_path = f"{data_path.as_posix()}.predictions.slp" + output_path = data_path_obj.with_suffix(".predictions.slp") output_path_obj = Path(output_path) @@ -5582,7 +5590,8 @@ def main(args: Optional[list] = None): elif len(data_path_list) > 1: output_path_obj = Path(output_path) output_path = ( - output_path_obj / data_path_obj.with_suffix(".predictions.slp").name + output_path_obj + / (data_path_obj.with_suffix(".predictions.slp")).name ) output_path_obj = Path(output_path) # Create the containing directory if needed. @@ -5618,7 +5627,12 @@ def main(args: Optional[list] = None): labels_pr.provenance["args"] = vars(args) # Save results. - labels_pr.save(output_path) + try: + labels_pr.save(output_path) + except Exception as e: + print("WARNING: Provided output path invalid.") + fallback_path = data_path_obj.with_suffix(".predictions.slp") + labels_pr.save(fallback_path) print("Saved output:", output_path) if args.open_in_gui: @@ -5629,76 +5643,57 @@ def main(args: Optional[list] = None): # running tracking on existing prediction file elif getattr(args, "tracking.tracker") is not None: - for data_path, provider in zip(data_path_list, provider_list): - # Load predictions - data_path_obj = Path(data_path) - print("Loading predictions...") - labels_pr = sleap.load_file(data_path_obj.as_posix()) - frames = sorted(labels_pr.labeled_frames, key=lambda lf: lf.frame_idx) + provider = provider_list[0] + data_path = data_path_list[0] - print("Starting tracker...") - frames = run_tracker(frames=frames, tracker=tracker) - tracker.final_pass(frames) + # Load predictions + data_path = args.data_path + print("Loading predictions...") + labels_pr = sleap.load_file(data_path) + frames = sorted(labels_pr.labeled_frames, key=lambda lf: lf.frame_idx) - labels_pr = Labels(labeled_frames=frames) + print("Starting tracker...") + frames = run_tracker(frames=frames, tracker=tracker) + tracker.final_pass(frames) - if output_path is None: - output_path = f"{data_path}.{tracker.get_name()}.slp" - output_path_obj = Path(output_path) + labels_pr = Labels(labeled_frames=frames) - else: - output_path_obj = Path(output_path) - if ( - output_path_obj.exists() - and output_path_obj.is_file() - and len(data_path_list) > 1 - ): - raise ValueError( - "output_path argument must be a directory if multiple video inputs are given" - ) + if output_path is None: + output_path = f"{data_path}.{tracker.get_name()}.slp" - elif not output_path_obj.exists() and len(data_path_list) > 1: - output_path = output_path_obj / data_path_obj.with_suffix( - ".predictions.slp" - ) - output_path_obj = Path(output_path) - output_path_obj.parent.mkdir(exist_ok=True, parents=True) + if args.no_empty_frames: + # Clear empty frames if specified. + labels_pr.remove_empty_frames() - if args.no_empty_frames: - # Clear empty frames if specified. - labels_pr.remove_empty_frames() - - finish_timestamp = str(datetime.now()) - total_elapsed = time() - t0 - print("Finished inference at:", finish_timestamp) - print(f"Total runtime: {total_elapsed} secs") - print(f"Predicted frames: {len(labels_pr)}/{len(provider)}") + finish_timestamp = str(datetime.now()) + total_elapsed = time() - t0 + print("Finished inference at:", finish_timestamp) + print(f"Total runtime: {total_elapsed} secs") + print(f"Predicted frames: {len(labels_pr)}/{len(provider)}") - # Add provenance metadata to predictions. - labels_pr.provenance["sleap_version"] = sleap.__version__ - labels_pr.provenance["platform"] = platform.platform() - labels_pr.provenance["command"] = " ".join(sys.argv) - labels_pr.provenance["data_path"] = data_path_obj.as_posix() - labels_pr.provenance["output_path"] = output_path_obj.as_posix() - labels_pr.provenance["total_elapsed"] = total_elapsed - labels_pr.provenance["start_timestamp"] = start_timestamp - labels_pr.provenance["finish_timestamp"] = finish_timestamp + # Add provenance metadata to predictions. + labels_pr.provenance["sleap_version"] = sleap.__version__ + labels_pr.provenance["platform"] = platform.platform() + labels_pr.provenance["command"] = " ".join(sys.argv) + labels_pr.provenance["data_path"] = data_path + labels_pr.provenance["output_path"] = output_path + labels_pr.provenance["total_elapsed"] = total_elapsed + labels_pr.provenance["start_timestamp"] = start_timestamp + labels_pr.provenance["finish_timestamp"] = finish_timestamp - print("Provenance:") - pprint(labels_pr.provenance) - print() + print("Provenance:") + pprint(labels_pr.provenance) + print() - labels_pr.provenance["args"] = vars(args) + labels_pr.provenance["args"] = vars(args) - # Save results. - labels_pr.save(output_path) - print("Saved output:", output_path) + # Save results. + labels_pr.save(output_path) - if args.open_in_gui: - subprocess.call(["sleap-label", output_path]) + print("Saved output:", output_path) - # Reset output_path for next iteration - output_path = args.output + if args.open_in_gui: + subprocess.call(["sleap-label", output_path]) else: raise ValueError( diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py index 646049256..cdd56da09 100644 --- a/tests/nn/test_inference.py +++ b/tests/nn/test_inference.py @@ -8,7 +8,7 @@ import numpy as np import pytest -import pandas +import pandas as pd import tensorflow as tf import tensorflow_hub as hub from numpy.testing import assert_array_equal, assert_allclose @@ -1511,7 +1511,7 @@ def test_sleap_track_single_input( sleap_track(args=args) # Assert predictions file exists - output_path = f"{slp_path}.predictions.slp" + output_path = Path(slp_path).with_suffix(".predictions.slp") assert Path(output_path).exists() # Create invalid sleap-track command @@ -1539,8 +1539,6 @@ def test_sleap_track_mult_input_slp( # Copy and paste the video into the temp dir multiple times num_copies = 3 for i in range(num_copies): - # Construct the destination path with a unique name for the video - # Construct the destination path with a unique name for the SLP file slp_dest_path = slp_path / f"old_slp_copy_{i}.slp" shutil.copy(slp_file, slp_dest_path) @@ -1563,8 +1561,8 @@ def test_sleap_track_mult_input_slp( } # Add other video formats if necessary for file_path in slp_path_list: - if file_path.suffix in expected_extensions: - expected_output_file = f"{file_path}.predictions.slp" + if file_path in expected_extensions: + expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() @@ -1607,7 +1605,7 @@ def test_sleap_track_mult_input_slp_mp4( # Assert predictions file exists for file_path in slp_path_list: if file_path.suffix == ".mp4": - expected_output_file = f"{file_path}.predictions.slp" + expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() @@ -1647,7 +1645,7 @@ def test_sleap_track_mult_input_mp4( # Assert predictions file exists for file_path in slp_path_list: if file_path.suffix == ".mp4": - expected_output_file = f"{file_path}.predictions.slp" + expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() @@ -1687,17 +1685,12 @@ def test_sleap_track_output_mult( sleap_track(args=args) slp_path = Path(slp_path) - print(f"Contents of the directory {slp_path_obj}:") - for file in slp_path_obj.iterdir(): - print(file) - # Check if there are any files in the directory for file_path in slp_path_list: if file_path.suffix == ".mp4": expected_output_file = output_path_obj / ( file_path.stem + ".predictions.slp" ) - print(f"expected output: {expected_output_file}") assert Path(expected_output_file).exists() @@ -1829,7 +1822,7 @@ def test_sleap_track_invalid_csv( # Create a CSV file with missing 'data_path' column csv_missing_column_path = tmpdir / "missing_column.csv" - df_missing_column = pandas.DataFrame( + df_missing_column = pd.DataFrame( {"some_other_column": ["video1.mp4", "video2.mp4", "video3.mp4"]} ) df_missing_column.to_csv(csv_missing_column_path, index=False) @@ -1846,9 +1839,7 @@ def test_sleap_track_invalid_csv( ).split() # Run inference and expect ValueError for missing 'data_path' column - with pytest.raises( - ValueError, match="Column 'data_path' does not exist in the CSV file." - ): + with pytest.raises(ValueError): sleap_track(args=args_missing_column) # Create sleap-track command for empty CSV file @@ -1859,7 +1850,7 @@ def test_sleap_track_invalid_csv( ).split() # Run inference and expect ValueError for empty CSV file - with pytest.raises(ValueError, match=f"CSV file is empty: {csv_empty_path}"): + with pytest.raises(ValueError): sleap_track(args=args_empty) @@ -1905,7 +1896,7 @@ def test_sleap_track_text_file_input( # Assert predictions file exists for file_path in slp_path_list: if file_path.suffix == ".mp4": - expected_output_file = f"{file_path}.predictions.slp" + expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() From 31eb3fbce857ca0f6de8b7952ac5f4ad9a74c657 Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Thu, 25 Jul 2024 11:45:25 -0700 Subject: [PATCH 5/8] final edits --- sleap/nn/inference.py | 2 +- tests/nn/test_inference.py | 32 ++++++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py index 28319f792..bc72eeebf 100644 --- a/sleap/nn/inference.py +++ b/sleap/nn/inference.py @@ -5629,7 +5629,7 @@ def main(args: Optional[list] = None): # Save results. try: labels_pr.save(output_path) - except Exception as e: + except Exception: print("WARNING: Provided output path invalid.") fallback_path = data_path_obj.with_suffix(".predictions.slp") labels_pr.save(fallback_path) diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py index cdd56da09..76aef728c 100644 --- a/tests/nn/test_inference.py +++ b/tests/nn/test_inference.py @@ -12,6 +12,7 @@ import tensorflow as tf import tensorflow_hub as hub from numpy.testing import assert_array_equal, assert_allclose +from sleap.io.video import available_video_exts import sleap from sleap.gui.learning import runners @@ -1556,12 +1557,10 @@ def test_sleap_track_mult_input_slp( sleap_track(args=args) # Assert predictions file exists - expected_extensions = { - ".mp4", - } # Add other video formats if necessary + expected_extensions = available_video_exts() for file_path in slp_path_list: - if file_path in expected_extensions: + if file_path.suffix in expected_extensions: expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() @@ -1602,9 +1601,10 @@ def test_sleap_track_mult_input_slp_mp4( # Run inference sleap_track(args=args) - # Assert predictions file exists + expected_extensions = available_video_exts() + for file_path in slp_path_list: - if file_path.suffix == ".mp4": + if file_path.suffix in expected_extensions: expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() @@ -1643,8 +1643,10 @@ def test_sleap_track_mult_input_mp4( sleap_track(args=args) # Assert predictions file exists + expected_extensions = available_video_exts() + for file_path in slp_path_list: - if file_path.suffix == ".mp4": + if file_path.suffix in expected_extensions: expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() @@ -1686,8 +1688,10 @@ def test_sleap_track_output_mult( slp_path = Path(slp_path) # Check if there are any files in the directory + expected_extensions = available_video_exts() + for file_path in slp_path_list: - if file_path.suffix == ".mp4": + if file_path.suffix in expected_extensions: expected_output_file = output_path_obj / ( file_path.stem + ".predictions.slp" ) @@ -1808,8 +1812,10 @@ def test_sleap_track_csv_input( sleap_track(args=args) # Assert predictions file exists + expected_extensions = available_video_exts() + for file_path in slp_path_list: - if file_path.suffix == ".mp4": + if file_path.suffix in expected_extensions: expected_output_file = file_path.with_suffix(".TESTpredictions.slp") assert Path(expected_output_file).exists() @@ -1839,7 +1845,7 @@ def test_sleap_track_invalid_csv( ).split() # Run inference and expect ValueError for missing 'data_path' column - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=f"Column containing valid data_paths does not exist in the CSV file: {csv_missing_column_path}"): sleap_track(args=args_missing_column) # Create sleap-track command for empty CSV file @@ -1850,7 +1856,7 @@ def test_sleap_track_invalid_csv( ).split() # Run inference and expect ValueError for empty CSV file - with pytest.raises(ValueError): + with pytest.raises(ValueError, match = f"CSV file is empty: {csv_empty_path}"): sleap_track(args=args_empty) @@ -1894,8 +1900,10 @@ def test_sleap_track_text_file_input( sleap_track(args=args) # Assert predictions file exists + expected_extensions = available_video_exts() + for file_path in slp_path_list: - if file_path.suffix == ".mp4": + if file_path.suffix in expected_extensions: expected_output_file = Path(file_path).with_suffix(".predictions.slp") assert Path(expected_output_file).exists() From 41441be0a86cabcad4852da02b8def3201ee1c41 Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Thu, 25 Jul 2024 12:46:59 -0700 Subject: [PATCH 6/8] black --- tests/nn/test_inference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py index 76aef728c..e93d79c21 100644 --- a/tests/nn/test_inference.py +++ b/tests/nn/test_inference.py @@ -1845,7 +1845,9 @@ def test_sleap_track_invalid_csv( ).split() # Run inference and expect ValueError for missing 'data_path' column - with pytest.raises(ValueError, match=f"Column containing valid data_paths does not exist in the CSV file: {csv_missing_column_path}"): + with pytest.raises( + ValueError, + ): sleap_track(args=args_missing_column) # Create sleap-track command for empty CSV file @@ -1856,7 +1858,7 @@ def test_sleap_track_invalid_csv( ).split() # Run inference and expect ValueError for empty CSV file - with pytest.raises(ValueError, match = f"CSV file is empty: {csv_empty_path}"): + with pytest.raises(ValueError): sleap_track(args=args_empty) From 9860fe85fce1549b0e94f10f9336a565220e3c5b Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Tue, 30 Jul 2024 10:08:40 -0700 Subject: [PATCH 7/8] documentation changes --- docs/guides/cli.md | 7 +++++-- sleap/nn/inference.py | 6 ++++-- tests/nn/test_inference.py | 13 ++++++------- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/guides/cli.md b/docs/guides/cli.md index ab62f3130..d7e044acf 100644 --- a/docs/guides/cli.md +++ b/docs/guides/cli.md @@ -138,7 +138,10 @@ usage: sleap-track [-h] [-m MODELS] [--frames FRAMES] [--only-labeled-frames] [- [data_path] positional arguments: - data_path Path to data to predict on. This can be a labels (.slp) file or any supported video format. + data_path Path to data to predict on. This can be one of the following: A .slp file containing labeled data; A folder containing multiple + video files in supported formats; An individual video file in a supported format; A CSV file with a column of file paths to + video files in supported formats. If more than one columns contains file paths, the first will be used for data path and the + next column will be tested as the output path; A text file with a path to a video file on each line optional arguments: -h, --help show this help message and exit @@ -153,7 +156,7 @@ optional arguments: Only run inference on unlabeled suggested frames when running on labels dataset. This is useful for generating predictions for initialization during labeling. -o OUTPUT, --output OUTPUT - The output filename to use for the predicted data. If not provided, defaults to '[data_path].predictions.slp'. + The output filename or directory path to use for the predicted data. If not provided, defaults to '[data_path].predictions.slp'. --no-empty-frames Clear any empty frames that did not have any detected instances before saving to output. --verbosity {none,rich,json} Verbosity of inference progress reporting. 'none' does not output anything during inference, 'rich' displays an updating diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py index bc72eeebf..421378d56 100644 --- a/sleap/nn/inference.py +++ b/sleap/nn/inference.py @@ -5286,8 +5286,10 @@ def _make_provider_from_cli(args: argparse.Namespace) -> Tuple[Provider, str]: args: Parsed CLI namespace. Returns: - `(provider_list, data_path_list, output_path_list)` with the data `Provider`, path to the data - that was specified in the args, and list out output paths if a csv file was inputed. + `(provider_list, data_path_list, output_path_list)` where `provider_list` contains the data providers, + `data_path_list` contains the paths to the specified data, and the `output_path_list` contains the list + of output paths if a CSV file with a column of output paths was provided; otherwise, `output_path_list` + defaults to None """ # Figure out which input path to use. diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py index e93d79c21..d13180591 100644 --- a/tests/nn/test_inference.py +++ b/tests/nn/test_inference.py @@ -10,7 +10,6 @@ import pytest import pandas as pd import tensorflow as tf -import tensorflow_hub as hub from numpy.testing import assert_array_equal, assert_allclose from sleap.io.video import available_video_exts @@ -1826,12 +1825,12 @@ def test_sleap_track_invalid_csv( tmpdir, ): - # Create a CSV file with missing 'data_path' column - csv_missing_column_path = tmpdir / "missing_column.csv" - df_missing_column = pd.DataFrame( - {"some_other_column": ["video1.mp4", "video2.mp4", "video3.mp4"]} + # Create a CSV file with nonexistant data files + csv_nonexistant_files_path = tmpdir / "nonexistant_files.csv" + df_nonexistant_files = pd.DataFrame( + {"data_path": ["video1.mp4", "video2.mp4", "video3.mp4"]} ) - df_missing_column.to_csv(csv_missing_column_path, index=False) + df_nonexistant_files.to_csv(csv_nonexistant_files_path, index=False) # Create an empty CSV file csv_empty_path = tmpdir / "empty.csv" @@ -1839,7 +1838,7 @@ def test_sleap_track_invalid_csv( # Create sleap-track command for missing 'data_path' column args_missing_column = ( - f"{csv_missing_column_path} --model {min_centroid_model_path} " + f"{csv_nonexistant_files_path} --model {min_centroid_model_path} " f"--tracking.tracker simple " f"--model {min_centered_instance_model_path} --video.index 0 --frames 1-3 --cpu" ).split() From 9e974fe575668690ca8b7b222e0a4d4b406d6433 Mon Sep 17 00:00:00 2001 From: emdavis02 Date: Wed, 31 Jul 2024 10:52:01 -0700 Subject: [PATCH 8/8] documentation changes --- docs/guides/cli.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/guides/cli.md b/docs/guides/cli.md index d7e044acf..03b806903 100644 --- a/docs/guides/cli.md +++ b/docs/guides/cli.md @@ -139,9 +139,9 @@ usage: sleap-track [-h] [-m MODELS] [--frames FRAMES] [--only-labeled-frames] [- positional arguments: data_path Path to data to predict on. This can be one of the following: A .slp file containing labeled data; A folder containing multiple - video files in supported formats; An individual video file in a supported format; A CSV file with a column of file paths to - video files in supported formats. If more than one columns contains file paths, the first will be used for data path and the - next column will be tested as the output path; A text file with a path to a video file on each line + video files in supported formats; An individual video file in a supported format; A CSV file with a column of video file paths. + If more than one column is provided in the CSV file, the first will be used for the input data paths and the next column will be + used as the output paths; A text file with a path to a video file on each line optional arguments: -h, --help show this help message and exit