From 463aeb27a41895e812f23a56123ae562747a3d20 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:48:58 -0400 Subject: [PATCH 01/39] work for #37 --- looper/conductor.py | 93 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/looper/conductor.py b/looper/conductor.py index ffbb1b54..21b4d015 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -4,6 +4,9 @@ import logging import os import subprocess +import signal +import psutil +import sys import time import yaml from math import ceil @@ -189,6 +192,7 @@ def __init__( the project level, rather that on the sample level) """ super(SubmissionConductor, self).__init__() + self.collate = collate self.section_key = PROJECT_PL_KEY if self.collate else SAMPLE_PL_KEY self.pl_iface = pipeline_interface @@ -210,6 +214,7 @@ def __init__( self._curr_size = 0 self._failed_sample_names = [] self._curr_skip_pool = [] + self.process_id = None # this is used for a submitted subprocess if self.extra_pipe_args: _LOGGER.debug( @@ -392,6 +397,10 @@ def submit(self, force=False): not for dry run) """ submitted = False + + # Override signal handler so that Ctrl+C can be used to gracefully terminate child process + signal.signal(signal.SIGINT, self._signal_int_handler) + if not self._pool: _LOGGER.debug("No submission (no pooled samples): %s", self.pl_name) # submitted = False @@ -421,7 +430,9 @@ def submit(self, force=False): # Capture submission command return value so that we can # intercept and report basic submission failures; #167 try: - subprocess.check_call(submission_command, shell=True) + process = subprocess.Popen(submission_command, shell=True) + self.process_id = process.pid + return_code = process.wait() except subprocess.CalledProcessError: fails = ( "" if self.collate else [s.sample_name for s in self._samples] @@ -489,6 +500,86 @@ def _sample_lump_name(self, pool): # name concordant with 1-based, not 0-based indexing. return "lump{}".format(self._num_total_job_submissions + 1) + def _signal_int_handler(self, signal, frame): + """ + For catching interrupt (Ctrl +C) signals. Fails gracefully. + """ + signal_type = "SIGINT" + self._generic_signal_handler(signal_type) + + def _generic_signal_handler(self, signal_type): + """ + Function for handling both SIGTERM and SIGINT + """ + message = "Received " + signal_type + ". Failing gracefully..." + _LOGGER.warning(msg=message) + + self._terminate_current_subprocess() + + sys.exit(1) + + def _terminate_current_subprocess(self): + + def pskill(proc_pid, sig=signal.SIGINT): + parent_process = psutil.Process(proc_pid) + for child_proc in parent_process.children(recursive=True): + child_proc.send_signal(sig) + parent_process.send_signal(sig) + + if self.process_id is None: + return + + # Gently wait for the subprocess before attempting to kill it + sys.stdout.flush() + still_running = self._attend_process(psutil.Process(self.process_id), 0) + sleeptime = 0.25 + time_waiting = 0 + + while still_running and time_waiting < 3: + try: + if time_waiting > 2: + pskill(self.process_id, signal.SIGKILL) + elif time_waiting > 1: + pskill(self.process_id, signal.SIGTERM) + else: + pskill(self.process_id, signal.SIGINT) + + except OSError: + # This would happen if the child process ended between the check + # and the next kill step + still_running = False + time_waiting = time_waiting + sleeptime + + # Now see if it's still running + time_waiting = time_waiting + sleeptime + if not self._attend_process(psutil.Process(self.process_id), sleeptime): + still_running = False + + if still_running: + _LOGGER.warning(f"Unable to halt child process: {self.process_id}") + else: + if time_waiting > 0: + note = f"terminated after {time_waiting} sec" + else: + note = "was already terminated" + _LOGGER.warning(msg=f"Child process {self.process_id} {note}.") + + def _attend_process(self, proc, sleeptime): + """ + Waits on a process for a given time to see if it finishes, returns True + if it's still running after the given time or False as soon as it + returns. + + :param psutil.Process proc: Process object opened by psutil.Popen() + :param float sleeptime: Time to wait + :return bool: True if process is still running; otherwise false + """ + try: + proc.wait(timeout=int(sleeptime)) + except psutil.TimeoutExpired: + return True + return False + def _jobname(self, pool): """Create the name for a job submission.""" return "{}_{}".format(self.pl_iface.pipeline_name, self._sample_lump_name(pool)) From ae5d14d442e615139968c8828991049d70ce70cd Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:54:29 -0400 Subject: [PATCH 02/39] add psutil to requirements --- requirements/requirements-all.txt | 3 ++- requirements/requirements-test.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index a78b632d..bd28baa8 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -11,4 +11,5 @@ pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.5.2 yacman==0.9.3 -pydantic2-argparse>=0.9.2 \ No newline at end of file +pydantic2-argparse>=0.9.2 +psutil \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 87d10086..f5579eba 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -4,4 +4,5 @@ pytest pytest-cov pytest-remotedata veracitools -GitPython \ No newline at end of file +GitPython +psutil \ No newline at end of file From a675ab1c15bf18c351e16916785a0d5a080fb127 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 10 Jun 2024 16:10:57 -0400 Subject: [PATCH 03/39] handle subprocess errors using PIPE instead of subprocess.CalledProcessError exception --- looper/conductor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 21b4d015..48be8555 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -12,7 +12,7 @@ from math import ceil from copy import copy, deepcopy from json import loads -from subprocess import check_output +from subprocess import check_output, PIPE from typing import * from eido import read_schema, get_input_files_size @@ -429,11 +429,11 @@ def submit(self, force=False): submission_command = "{} {}".format(sub_cmd, script) # Capture submission command return value so that we can # intercept and report basic submission failures; #167 - try: - process = subprocess.Popen(submission_command, shell=True) - self.process_id = process.pid - return_code = process.wait() - except subprocess.CalledProcessError: + + process = subprocess.Popen(submission_command, stderr=PIPE, shell=True) + self.process_id = process.pid + output, errors = process.communicate() + if errors: fails = ( "" if self.collate else [s.sample_name for s in self._samples] ) From cbf9b24c9f142d2aa34e5cee296106f5908958a5 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:28:07 -0400 Subject: [PATCH 04/39] update advanced and pipestat examples, fix corresponding tests, add output_schema to pipestat namespace --- looper/conductor.py | 5 ++-- .../advanced/pipeline/col_pipeline1.py | 0 .../advanced/pipeline/col_pipeline2.py | 0 .../advanced/pipeline/other_pipeline2.py | 0 .../advanced/pipeline/pipeline1.py | 0 .../pipeline/pipeline_interface1_project.yaml | 8 +++---- .../pipeline/pipeline_interface1_sample.yaml | 8 +++---- .../pipeline/pipeline_interface2_project.yaml | 8 +++---- .../pipeline/pipeline_interface2_sample.yaml | 7 ++---- .../pipestat_pipeline_interface1_sample.yaml | 8 +++---- .../pipestat_pipeline_interface2_sample.yaml | 8 +++---- .../pipestat/pipeline_pipestat/count_lines.py | 7 +++++- .../pipeline_pipestat/pipeline_interface.yaml | 2 +- tests/smoketests/test_other.py | 24 +++++++++++++++++++ tests/test_comprehensive.py | 1 + 15 files changed, 52 insertions(+), 34 deletions(-) create mode 100644 tests/data/hello_looper-dev/advanced/pipeline/col_pipeline1.py create mode 100644 tests/data/hello_looper-dev/advanced/pipeline/col_pipeline2.py create mode 100644 tests/data/hello_looper-dev/advanced/pipeline/other_pipeline2.py create mode 100644 tests/data/hello_looper-dev/advanced/pipeline/pipeline1.py diff --git a/looper/conductor.py b/looper/conductor.py index 48be8555..bcdd57df 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -429,11 +429,11 @@ def submit(self, force=False): submission_command = "{} {}".format(sub_cmd, script) # Capture submission command return value so that we can # intercept and report basic submission failures; #167 - process = subprocess.Popen(submission_command, stderr=PIPE, shell=True) self.process_id = process.pid output, errors = process.communicate() - if errors: + _LOGGER.debug(msg=errors) + if process.returncode != 0: fails = ( "" if self.collate else [s.sample_name for s in self._samples] ) @@ -654,6 +654,7 @@ def _set_pipestat_namespace( "results_file": psm.file, "record_identifier": psm.record_identifier, "config_file": psm.config_path, + "output_schema": psm.cfg["_schema_path"], } filtered_namespace = {k: v for k, v in full_namespace.items() if v} return YAMLConfigManager(filtered_namespace) diff --git a/tests/data/hello_looper-dev/advanced/pipeline/col_pipeline1.py b/tests/data/hello_looper-dev/advanced/pipeline/col_pipeline1.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/hello_looper-dev/advanced/pipeline/col_pipeline2.py b/tests/data/hello_looper-dev/advanced/pipeline/col_pipeline2.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/hello_looper-dev/advanced/pipeline/other_pipeline2.py b/tests/data/hello_looper-dev/advanced/pipeline/other_pipeline2.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline1.py b/tests/data/hello_looper-dev/advanced/pipeline/pipeline1.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml index cddc14b7..2a23d321 100644 --- a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml @@ -2,10 +2,8 @@ pipeline_name: PIPELINE1 pipeline_type: project output_schema: output_schema.yaml var_templates: - path: "{looper.piface_dir}/pipelines/col_pipeline1.py" + path: "{looper.piface_dir}/col_pipeline1.py" command_template: > - {pipeline.var_templates.path} --project-name {project.name} + python3 {pipeline.var_templates.path} --project-name {project.name} + -bioconductor: - readFunName: readData - readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml index 43638d92..8e79b7ae 100644 --- a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml @@ -3,13 +3,11 @@ pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: output_schema.yaml var_templates: - path: "{looper.piface_dir}/pipelines/pipeline1.py" + path: "{looper.piface_dir}/pipeline1.py" pre_submit: python_functions: - looper.write_sample_yaml command_template: > - {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + -bioconductor: - readFunName: readData - readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml index 7c4a4223..824b7e09 100644 --- a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml @@ -2,12 +2,10 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: project output_schema: output_schema.yaml var_templates: - path: "{looper.piface_dir}/pipelines/col_pipeline2.py" + path: "{looper.piface_dir}/col_pipeline2.py" command_template: > - {pipeline.var_templates.path} --project-name {project.name} + python3 {pipeline.var_templates.path} --project-name {project.name} compute: size_dependent_variables: resources-project.tsv -bioconductor: - readFunName: readData - readFunPath: readData.R + diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml index 987f7873..589aef6d 100644 --- a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml @@ -2,15 +2,12 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: sample output_schema: output_schema.yaml var_templates: - path: "{looper.piface_dir}/pipelines/other_pipeline2.py" + path: "{looper.piface_dir}/other_pipeline2.py" pre_submit: python_functions: - looper.write_sample_yaml command_template: > - {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} compute: size_dependent_variables: resources-sample.tsv -bioconductor: - readFunName: readData - readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml index ff40c411..e687ea0d 100644 --- a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml @@ -3,13 +3,11 @@ pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: pipestat_output_schema.yaml var_templates: - path: "{looper.piface_dir}/pipelines/pipeline1.py" + path: "{looper.piface_dir}/pipeline1.py" pre_submit: python_functions: - looper.write_sample_yaml command_template: > - {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + -bioconductor: - readFunName: readData - readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml index 79dcf50f..bac3ea3d 100644 --- a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml @@ -3,15 +3,13 @@ pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: pipestat_output_schema.yaml var_templates: - path: "{looper.piface_dir}/pipelines/other_pipeline2.py" + path: "{looper.piface_dir}/other_pipeline2.py" pre_submit: python_functions: - looper.write_sample_yaml command_template: > - {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} compute: size_dependent_variables: resources-sample.tsv -bioconductor: - readFunName: readData - readFunPath: readData.R + diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py index 97e866ee..6f6a4ab8 100755 --- a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py @@ -1,3 +1,5 @@ +import os.path + import pipestat import sys @@ -8,14 +10,17 @@ ] # this is the sample we wish to process by reading the number of lines sample_name = sys.argv[2] results_file = sys.argv[3] +schema_path = sys.argv[4] # Create pipestat manager and then report values psm = pipestat.PipestatManager( - schema_path="pipeline_pipestat/pipestat_output_schema.yaml", + schema_path=schema_path, results_file_path=results_file, record_identifier=sample_name, ) + +text_file = os.path.abspath(text_file) # Read text file and count lines with open(text_file, "r") as f: result = {"number_of_lines": len(f.readlines())} diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml index 1d26ac43..ec6cf255 100644 --- a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml @@ -2,4 +2,4 @@ pipeline_name: example_pipestat_pipeline pipeline_type: sample output_schema: pipestat_output_schema.yaml command_template: > - python {looper.piface_dir}/count_lines.py {sample.file} {sample.sample_name} {pipestat.results_file} \ No newline at end of file + python {looper.piface_dir}/count_lines.py {sample.file} {sample.sample_name} {pipestat.results_file} {pipestat.output_schema} \ No newline at end of file diff --git a/tests/smoketests/test_other.py b/tests/smoketests/test_other.py index 2527f4f2..b90e9b61 100644 --- a/tests/smoketests/test_other.py +++ b/tests/smoketests/test_other.py @@ -95,6 +95,30 @@ def test_pipestat_rerun(self, prep_temp_pep_pipestat, pipeline_name, flags): """Verify that rerun works with either failed or waiting flags""" tp = prep_temp_pep_pipestat _make_flags_pipestat(tp, flags, pipeline_name) + path_to_looper_config = prep_temp_pep_pipestat + pipestat_dir = os.path.dirname(path_to_looper_config) + + # open up the project config and replace the derived attributes with the path to the data. In a way, this simulates using the environment variables. + pipestat_project_file = get_project_config_path(path_to_looper_config) + + pipestat_pipeline_interface_file = os.path.join( + pipestat_dir, "pipeline_pipestat/pipeline_interface.yaml" + ) + + with open(pipestat_project_file, "r") as f: + pipestat_project_data = safe_load(f) + + pipestat_project_data["sample_modifiers"]["derive"]["sources"]["source1"] = ( + os.path.join(pipestat_dir, "data/{sample_name}.txt") + ) + + with open(pipestat_pipeline_interface_file, "r") as f: + pipestat_piface_data = safe_load(f) + + pipeline_name = pipestat_piface_data["pipeline_name"] + + with open(pipestat_project_file, "w") as f: + dump(pipestat_project_data, f) x = ["rerun", "--looper-config", tp] try: diff --git a/tests/test_comprehensive.py b/tests/test_comprehensive.py index cce74ca5..9b857f8f 100644 --- a/tests/test_comprehensive.py +++ b/tests/test_comprehensive.py @@ -167,6 +167,7 @@ def test_comprehensive_looper_pipestat(prep_temp_pep_pipestat): tsv_list = [os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".tsv")] assert len(tsv_list) == 0 with pytest.raises(RecordNotFoundError): + psm = PipestatManager(config_file=path_to_pipestat_config) retrieved_result = psm.retrieve_one(record_identifier="frog_2") From 7df88bfdb4ea7cae13a2fbaa69786796b767c60c Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:39:35 -0400 Subject: [PATCH 05/39] add more doc strings --- looper/conductor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/looper/conductor.py b/looper/conductor.py index bcdd57df..ec074d52 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -519,6 +519,7 @@ def _generic_signal_handler(self, signal_type): sys.exit(1) def _terminate_current_subprocess(self): + """This terminates the current sub process associated with self.process_id""" def pskill(proc_pid, sig=signal.SIGINT): parent_process = psutil.Process(proc_pid) From 296b0cf096742b7decd46ce8695a7a3b1f51987b Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:41:21 -0400 Subject: [PATCH 06/39] edit comments --- looper/conductor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/conductor.py b/looper/conductor.py index ec074d52..1b08961d 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -214,7 +214,7 @@ def __init__( self._curr_size = 0 self._failed_sample_names = [] self._curr_skip_pool = [] - self.process_id = None # this is used for a submitted subprocess + self.process_id = None # this is used for currently submitted subprocess if self.extra_pipe_args: _LOGGER.debug( From b9b53cc20804a582eaa7261299c558b399008133 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:58:14 -0400 Subject: [PATCH 07/39] align command templates to be the same --- .../pipestat/pipeline_pipestat/pipeline_interface.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml index ec6cf255..e5a14402 100644 --- a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml @@ -2,4 +2,4 @@ pipeline_name: example_pipestat_pipeline pipeline_type: sample output_schema: pipestat_output_schema.yaml command_template: > - python {looper.piface_dir}/count_lines.py {sample.file} {sample.sample_name} {pipestat.results_file} {pipestat.output_schema} \ No newline at end of file + python3 {looper.piface_dir}/count_lines.py {sample.file} {sample.sample_name} {pipestat.results_file} {pipestat.output_schema} \ No newline at end of file From 49fa338ae48a2bd9173d1bfcd4f961b4d2b13b71 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 11 Jun 2024 16:30:35 -0400 Subject: [PATCH 08/39] add logging output/errors --- looper/conductor.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 1b08961d..9fea9865 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -429,10 +429,16 @@ def submit(self, force=False): submission_command = "{} {}".format(sub_cmd, script) # Capture submission command return value so that we can # intercept and report basic submission failures; #167 - process = subprocess.Popen(submission_command, stderr=PIPE, shell=True) + process = subprocess.Popen( + submission_command, stdout=PIPE, stderr=PIPE, shell=True + ) self.process_id = process.pid output, errors = process.communicate() - _LOGGER.debug(msg=errors) + if output: + # TODO this is ugly and needs to be formatted better before being presented to user. + _LOGGER.info(msg=output) + if errors: + _LOGGER.info(msg=errors) if process.returncode != 0: fails = ( "" if self.collate else [s.sample_name for s in self._samples] From 8f21fd566b40e4716b5649ab8242c6c88c8873e7 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 12 Jun 2024 08:55:00 -0400 Subject: [PATCH 09/39] revert sending output to PIPE, clean up imports --- looper/conductor.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 9fea9865..4e23d0b3 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -10,9 +10,8 @@ import time import yaml from math import ceil -from copy import copy, deepcopy from json import loads -from subprocess import check_output, PIPE +from subprocess import check_output from typing import * from eido import read_schema, get_input_files_size @@ -22,12 +21,12 @@ from peppy.const import CONFIG_KEY, SAMPLE_NAME_ATTR, SAMPLE_YAML_EXT from peppy.exceptions import RemoteYAMLError from pipestat import PipestatError -from ubiquerg import expandpath, is_command_callable +from ubiquerg import expandpath from yaml import dump from yacman import FutureYAMLConfigManager as YAMLConfigManager from .const import * -from .exceptions import JobSubmissionException, SampleFailedException +from .exceptions import JobSubmissionException from .processed_project import populate_sample_paths from .utils import fetch_sample_flags, jinja_render_template_strictly from .const import PipelineLevel @@ -429,16 +428,9 @@ def submit(self, force=False): submission_command = "{} {}".format(sub_cmd, script) # Capture submission command return value so that we can # intercept and report basic submission failures; #167 - process = subprocess.Popen( - submission_command, stdout=PIPE, stderr=PIPE, shell=True - ) + process = subprocess.Popen(submission_command, shell=True) self.process_id = process.pid - output, errors = process.communicate() - if output: - # TODO this is ugly and needs to be formatted better before being presented to user. - _LOGGER.info(msg=output) - if errors: - _LOGGER.info(msg=errors) + process.wait() if process.returncode != 0: fails = ( "" if self.collate else [s.sample_name for s in self._samples] From 0c5f85edeea21eb95c4349a367763824af04627d Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 12 Jun 2024 09:26:51 -0400 Subject: [PATCH 10/39] remove build_submission_bundles as it appears unused --- looper/project.py | 59 ----------------------------------------------- 1 file changed, 59 deletions(-) diff --git a/looper/project.py b/looper/project.py index 16684ba7..4bbee02d 100644 --- a/looper/project.py +++ b/looper/project.py @@ -374,65 +374,6 @@ def get_sample_piface(self, sample_name): except KeyError: return None - def build_submission_bundles(self, protocol, priority=True): - """ - Create pipelines to submit for each sample of a particular protocol. - - With the argument (flag) to the priority parameter, there's control - over whether to submit pipeline(s) from only one of the project's - known pipeline locations with a match for the protocol, or whether to - submit pipelines created from all locations with a match for the - protocol. - - :param str protocol: name of the protocol/library for which to - create pipeline(s) - :param bool priority: to only submit pipeline(s) from the first of the - pipelines location(s) (indicated in the project config file) that - has a match for the given protocol; optional, default True - :return Iterable[(PipelineInterface, type, str, str)]: - :raises AssertionError: if there's a failure in the attempt to - partition an interface's pipeline scripts into disjoint subsets of - those already mapped and those not yet mapped - """ - - if not priority: - raise NotImplementedError( - "Currently, only prioritized protocol mapping is supported " - "(i.e., pipeline interfaces collection is a prioritized list, " - "so only the first interface with a protocol match is used.)" - ) - - # Pull out the collection of interfaces (potentially one from each of - # the locations indicated in the project configuration file) as a - # sort of pool of information about possible ways in which to submit - # pipeline(s) for sample(s) of the indicated protocol. - pifaces = self.interfaces.get_pipeline_interface(protocol) - if not pifaces: - raise PipelineInterfaceConfigError( - "No interfaces for protocol: {}".format(protocol) - ) - - # coonvert to a list, in the future we might allow to match multiple - pifaces = pifaces if isinstance(pifaces, str) else [pifaces] - - job_submission_bundles = [] - new_jobs = [] - - _LOGGER.debug("Building pipelines matched by protocol: {}".format(protocol)) - - for pipe_iface in pifaces: - # Determine how to reference the pipeline and where it is. - path = pipe_iface["path"] - if not (os.path.exists(path) or is_command_callable(path)): - _LOGGER.warning("Missing pipeline script: {}".format(path)) - continue - - # Add this bundle to the collection of ones relevant for the - # current PipelineInterface. - new_jobs.append(pipe_iface) - job_submission_bundles.append(new_jobs) - return list(itertools.chain(*job_submission_bundles)) - @staticmethod def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY): """ From 4c952d348d0cb1824474eaaee7c4e5ae84186690 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:28:41 -0400 Subject: [PATCH 11/39] pull sample_modifiers and/or project modifiers from looper config #270 --- looper/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/looper/utils.py b/looper/utils.py index fc667172..fce5fa88 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -543,6 +543,12 @@ def read_looper_config_file(looper_config_path: str) -> dict: if PIPESTAT_KEY in dp_data: return_dict[PIPESTAT_KEY] = dp_data[PIPESTAT_KEY] + if SAMPLE_MODS_KEY in dp_data: + return_dict[SAMPLE_MODS_KEY] = dp_data[SAMPLE_MODS_KEY] + + if PROJ_MODS_KEY in dp_data: + return_dict[PROJ_MODS_KEY] = dp_data[PROJ_MODS_KEY] + if PIPELINE_INTERFACES_KEY in dp_data: dp_data.setdefault(PIPELINE_INTERFACES_KEY, {}) return_dict[SAMPLE_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get("sample") From c8a734e190899bc9c460600cf030d0ffb102e169 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 14 Jun 2024 13:20:25 -0400 Subject: [PATCH 12/39] add sample_mods and project_mods to arguments and substitute them if necessary --- looper/cli_pydantic.py | 2 ++ looper/command_models/arguments.py | 10 ++++++++++ looper/command_models/commands.py | 2 ++ looper/const.py | 3 +++ looper/project.py | 10 ++++++++++ 5 files changed, 27 insertions(+) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 035a8043..764c311c 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -151,6 +151,8 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): looper_config_dict = read_looper_dotfile() _LOGGER.info(f"Using looper config ({looper_cfg_path}).") + # subcommand_args.sample_modifiers = {} + # subcommand_args.project_modifiers = {} for looper_config_key, looper_config_item in looper_config_dict.items(): setattr(subcommand_args, looper_config_key, looper_config_item) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 8c484d33..cd89845d 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -281,3 +281,13 @@ class ArgumentEnum(enum.Enum): default=(bool, False), description="Is this command executed for project-level?", ) + SAMPLE_MODIFIERS = Argument( + name="sample_modifiers", + default=(dict, None), + description="sample modifiers. Note: add these via looper config file. Not the CLI.", + ) + PROJECT_MODIFIERS = Argument( + name="project_modifiers", + default=(dict, None), + description="project modifiers. Note: add these via looper config file. Not the CLI", + ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 233cfd0b..29080201 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -61,6 +61,8 @@ def create_model(self) -> Type[pydantic.BaseModel]: ArgumentEnum.SETTINGS.value, ArgumentEnum.AMEND.value, ArgumentEnum.PROJECT_LEVEL.value, + ArgumentEnum.SAMPLE_MODIFIERS.value, + ArgumentEnum.PROJECT_MODIFIERS.value, ] RunParser = Command( diff --git a/looper/const.py b/looper/const.py index ca70851d..d6449f2b 100644 --- a/looper/const.py +++ b/looper/const.py @@ -2,6 +2,7 @@ import os from enum import Enum +from peppy.const import SAMPLE_MODS_KEY, PROJ_MODS_KEY __author__ = "Databio lab" __email__ = "nathan@code.databio.org" @@ -237,6 +238,8 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): PIPESTAT_KEY, DEFAULT_PIPESTAT_CONFIG_ATTR, PEP_CONFIG_KEY, + SAMPLE_MODS_KEY, + PROJ_MODS_KEY, ] # resource package TSV-related consts diff --git a/looper/project.py b/looper/project.py index 4bbee02d..e070233e 100644 --- a/looper/project.py +++ b/looper/project.py @@ -136,6 +136,16 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self.name = None # add sample pipeline interface to the project + if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): + _LOGGER.warning( + "Sample modifiers were provided in Looper config. Overwriting those in PEP." + ) + self.config.setdefault("sample_modifiers", {}) + self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) + elif kwargs.get(SAMPLE_MODS_KEY): + self.config.setdefault("sample_modifiers", {}) + self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) + if kwargs.get(SAMPLE_PL_ARG): self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG)) From 9c36b7b78838881c626ef2083dbd2d22572faa82 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:54:13 -0400 Subject: [PATCH 13/39] add hierarchical var_templates #334 --- looper/conductor.py | 8 +++++++- looper/pipeline_interface.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 4e23d0b3..5f71c9be 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -718,7 +718,13 @@ def write_script(self, pool, size): namespaces["pipeline"]["var_templates"] = pl_iface[VAR_TEMPL_KEY] or {} for k, v in namespaces["pipeline"]["var_templates"].items(): - namespaces["pipeline"]["var_templates"][k] = expandpath(v) + if isinstance(v, dict): + for key, value in v.items(): + namespaces["pipeline"]["var_templates"][k][key] = ( + jinja_render_template_strictly(value, namespaces) + ) + else: + namespaces["pipeline"]["var_templates"][k] = expandpath(v) # pre_submit hook namespace updates namespaces = _exec_pre_submit(pl_iface, namespaces) diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index abe9a43d..8674f3bb 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -90,7 +90,13 @@ def render_var_templates(self, namespaces): if curr_data: var_templates.update(curr_data) for k, v in var_templates.items(): - var_templates[k] = jinja_render_template_strictly(v, namespaces) + if isinstance(v, dict): + for key, value in v.items(): + var_templates[k][key] = jinja_render_template_strictly( + value, namespaces + ) + else: + var_templates[k] = jinja_render_template_strictly(v, namespaces) return var_templates def get_pipeline_schemas(self, schema_key=INPUT_SCHEMA_KEY): From a1640f2d331248fd0ba520b02cb8e4e27b25eaef Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:04:03 -0400 Subject: [PATCH 14/39] fix tests #334 --- looper/conductor.py | 38 +++++++++++++++++++++++++++++++++++--- looper/plugins.py | 10 ++++++---- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 5f71c9be..194d6292 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -35,13 +35,21 @@ _LOGGER = logging.getLogger(__name__) -def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=None): +def _get_yaml_path( + namespaces, + template_key, + template_subkey=None, + default_name_appendix="", + filename=None, +): """ Get a path to a YAML file for the sample. :param dict[dict]] namespaces: namespaces mapping :param str template_key: the name of the key in 'var_templates' piface section that points to a template to render to get the + user-provided target YAML path OR it is a 1st level key. + :param str template_subkey: the name of the key nested under template_key that that points to a template to render to get the user-provided target YAML path :param str default_name_appendix: a string to append to insert in target YAML file name: '{sample.sample_name}<>.yaml' @@ -52,11 +60,33 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename= if ( VAR_TEMPL_KEY in namespaces["pipeline"] and template_key in namespaces["pipeline"][VAR_TEMPL_KEY] + and template_subkey in namespaces["pipeline"][VAR_TEMPL_KEY][template_key] ): _LOGGER.debug(f"Sample namespace: {namespaces['sample']}") x = jinja_render_template_strictly("{sample.sample_name}", namespaces) _LOGGER.debug(f"x: {x}") - cpy = namespaces["pipeline"][VAR_TEMPL_KEY][template_key] + cpy = namespaces["pipeline"][VAR_TEMPL_KEY][template_key][template_subkey] + _LOGGER.debug(f"cpy: {cpy}") + path = expandpath(jinja_render_template_strictly(cpy, namespaces)) + _LOGGER.debug(f"path: {path}") + + if not path.endswith(SAMPLE_YAML_EXT) and not filename: + raise ValueError( + f"{template_key} is not a valid target YAML file path. " + f"It needs to end with: {' or '.join(SAMPLE_YAML_EXT)}" + ) + final_path = os.path.join(path, filename) if filename else path + if not os.path.exists(os.path.dirname(final_path)): + os.makedirs(os.path.dirname(final_path), exist_ok=True) + + elif ( + VAR_TEMPL_KEY in namespaces["pipeline"] + and template_subkey in namespaces["pipeline"][VAR_TEMPL_KEY] + ): + _LOGGER.debug(f"Sample namespace: {namespaces['sample']}") + x = jinja_render_template_strictly("{sample.sample_name}", namespaces) + _LOGGER.debug(f"x: {x}") + cpy = namespaces["pipeline"][VAR_TEMPL_KEY][template_subkey] _LOGGER.debug(f"cpy: {cpy}") path = expandpath(jinja_render_template_strictly(cpy, namespaces)) _LOGGER.debug(f"path: {path}") @@ -116,7 +146,9 @@ def write_submission_yaml(namespaces): :param dict namespaces: variable namespaces dict :return dict: sample namespace dict """ - path = _get_yaml_path(namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_submission") + path = _get_yaml_path( + namespaces, "write_submission_yaml", SAMPLE_CWL_YAML_PATH_KEY, "_submission" + ) my_namespaces = {} for namespace, values in namespaces.items(): my_namespaces.update({str(namespace): dict(values)}) diff --git a/looper/plugins.py b/looper/plugins.py index dc34283e..a4fee0d3 100644 --- a/looper/plugins.py +++ b/looper/plugins.py @@ -20,7 +20,9 @@ def write_sample_yaml_prj(namespaces): """ sample = namespaces["sample"] sample.to_yaml( - _get_yaml_path(namespaces, SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj"), + _get_yaml_path( + namespaces, "write_sample_yaml_prj", SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj" + ), add_prj_ref=True, ) return {"sample": sample} @@ -54,7 +56,7 @@ def load_template(pipeline): tpl = load_template(namespaces["pipeline"]) content = tpl.render(namespaces) - pth = _get_yaml_path(namespaces, "custom_template_output", "config") + pth = _get_yaml_path(namespaces, "custom_template_output", None, "config") namespaces["sample"]["custom_template_output"] = pth with open(pth, "wb") as fh: # print(content) @@ -95,7 +97,7 @@ def _get_schema_source( # File and Directory object types directly. sample = namespaces["sample"] sample.sample_yaml_cwl = _get_yaml_path( - namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl" + namespaces, "write_sample_yaml_cwl", SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl" ) if "input_schema" in namespaces["pipeline"]: @@ -154,7 +156,7 @@ def write_sample_yaml(namespaces): """ sample = namespaces["sample"] sample["sample_yaml_path"] = _get_yaml_path( - namespaces, SAMPLE_YAML_PATH_KEY, "_sample" + namespaces, "write_sample_yaml", SAMPLE_YAML_PATH_KEY, "_sample" ) sample.to_yaml(sample["sample_yaml_path"], add_prj_ref=False) return {"sample": sample} From e8f8801100ed693d76988e836eb2c4d25e5c8fe8 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:34:48 -0400 Subject: [PATCH 15/39] Revert "fix tests #334" This reverts commit a1640f2d331248fd0ba520b02cb8e4e27b25eaef. --- looper/conductor.py | 38 +++----------------------------------- looper/plugins.py | 10 ++++------ 2 files changed, 7 insertions(+), 41 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 194d6292..5f71c9be 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -35,21 +35,13 @@ _LOGGER = logging.getLogger(__name__) -def _get_yaml_path( - namespaces, - template_key, - template_subkey=None, - default_name_appendix="", - filename=None, -): +def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=None): """ Get a path to a YAML file for the sample. :param dict[dict]] namespaces: namespaces mapping :param str template_key: the name of the key in 'var_templates' piface section that points to a template to render to get the - user-provided target YAML path OR it is a 1st level key. - :param str template_subkey: the name of the key nested under template_key that that points to a template to render to get the user-provided target YAML path :param str default_name_appendix: a string to append to insert in target YAML file name: '{sample.sample_name}<>.yaml' @@ -60,33 +52,11 @@ def _get_yaml_path( if ( VAR_TEMPL_KEY in namespaces["pipeline"] and template_key in namespaces["pipeline"][VAR_TEMPL_KEY] - and template_subkey in namespaces["pipeline"][VAR_TEMPL_KEY][template_key] ): _LOGGER.debug(f"Sample namespace: {namespaces['sample']}") x = jinja_render_template_strictly("{sample.sample_name}", namespaces) _LOGGER.debug(f"x: {x}") - cpy = namespaces["pipeline"][VAR_TEMPL_KEY][template_key][template_subkey] - _LOGGER.debug(f"cpy: {cpy}") - path = expandpath(jinja_render_template_strictly(cpy, namespaces)) - _LOGGER.debug(f"path: {path}") - - if not path.endswith(SAMPLE_YAML_EXT) and not filename: - raise ValueError( - f"{template_key} is not a valid target YAML file path. " - f"It needs to end with: {' or '.join(SAMPLE_YAML_EXT)}" - ) - final_path = os.path.join(path, filename) if filename else path - if not os.path.exists(os.path.dirname(final_path)): - os.makedirs(os.path.dirname(final_path), exist_ok=True) - - elif ( - VAR_TEMPL_KEY in namespaces["pipeline"] - and template_subkey in namespaces["pipeline"][VAR_TEMPL_KEY] - ): - _LOGGER.debug(f"Sample namespace: {namespaces['sample']}") - x = jinja_render_template_strictly("{sample.sample_name}", namespaces) - _LOGGER.debug(f"x: {x}") - cpy = namespaces["pipeline"][VAR_TEMPL_KEY][template_subkey] + cpy = namespaces["pipeline"][VAR_TEMPL_KEY][template_key] _LOGGER.debug(f"cpy: {cpy}") path = expandpath(jinja_render_template_strictly(cpy, namespaces)) _LOGGER.debug(f"path: {path}") @@ -146,9 +116,7 @@ def write_submission_yaml(namespaces): :param dict namespaces: variable namespaces dict :return dict: sample namespace dict """ - path = _get_yaml_path( - namespaces, "write_submission_yaml", SAMPLE_CWL_YAML_PATH_KEY, "_submission" - ) + path = _get_yaml_path(namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_submission") my_namespaces = {} for namespace, values in namespaces.items(): my_namespaces.update({str(namespace): dict(values)}) diff --git a/looper/plugins.py b/looper/plugins.py index a4fee0d3..dc34283e 100644 --- a/looper/plugins.py +++ b/looper/plugins.py @@ -20,9 +20,7 @@ def write_sample_yaml_prj(namespaces): """ sample = namespaces["sample"] sample.to_yaml( - _get_yaml_path( - namespaces, "write_sample_yaml_prj", SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj" - ), + _get_yaml_path(namespaces, SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj"), add_prj_ref=True, ) return {"sample": sample} @@ -56,7 +54,7 @@ def load_template(pipeline): tpl = load_template(namespaces["pipeline"]) content = tpl.render(namespaces) - pth = _get_yaml_path(namespaces, "custom_template_output", None, "config") + pth = _get_yaml_path(namespaces, "custom_template_output", "config") namespaces["sample"]["custom_template_output"] = pth with open(pth, "wb") as fh: # print(content) @@ -97,7 +95,7 @@ def _get_schema_source( # File and Directory object types directly. sample = namespaces["sample"] sample.sample_yaml_cwl = _get_yaml_path( - namespaces, "write_sample_yaml_cwl", SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl" + namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl" ) if "input_schema" in namespaces["pipeline"]: @@ -156,7 +154,7 @@ def write_sample_yaml(namespaces): """ sample = namespaces["sample"] sample["sample_yaml_path"] = _get_yaml_path( - namespaces, "write_sample_yaml", SAMPLE_YAML_PATH_KEY, "_sample" + namespaces, SAMPLE_YAML_PATH_KEY, "_sample" ) sample.to_yaml(sample["sample_yaml_path"], add_prj_ref=False) return {"sample": sample} From 134dd8251f77c91bec39e4f405cbc1cae9296bda Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 19 Jun 2024 10:40:27 -0400 Subject: [PATCH 16/39] add recursive functions #334 --- looper/conductor.py | 18 +++++++++--------- looper/pipeline_interface.py | 11 ++--------- looper/utils.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 5f71c9be..93598431 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -28,7 +28,11 @@ from .const import * from .exceptions import JobSubmissionException from .processed_project import populate_sample_paths -from .utils import fetch_sample_flags, jinja_render_template_strictly +from .utils import ( + fetch_sample_flags, + jinja_render_template_strictly, + expand_nested_var_templates, +) from .const import PipelineLevel @@ -717,14 +721,10 @@ def write_script(self, pool, size): _LOGGER.debug(f"namespace pipelines: { pl_iface }") namespaces["pipeline"]["var_templates"] = pl_iface[VAR_TEMPL_KEY] or {} - for k, v in namespaces["pipeline"]["var_templates"].items(): - if isinstance(v, dict): - for key, value in v.items(): - namespaces["pipeline"]["var_templates"][k][key] = ( - jinja_render_template_strictly(value, namespaces) - ) - else: - namespaces["pipeline"]["var_templates"][k] = expandpath(v) + + namespaces["pipeline"]["var_templates"] = expand_nested_var_templates( + namespaces["pipeline"]["var_templates"], namespaces + ) # pre_submit hook namespace updates namespaces = _exec_pre_submit(pl_iface, namespaces) diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index 8674f3bb..1064f20f 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -17,7 +17,7 @@ InvalidResourceSpecificationException, PipelineInterfaceConfigError, ) -from .utils import jinja_render_template_strictly +from .utils import jinja_render_template_strictly, render_nested_var_templates __author__ = "Michal Stolarczyk" __email__ = "michal@virginia.edu" @@ -89,14 +89,7 @@ def render_var_templates(self, namespaces): var_templates = {} if curr_data: var_templates.update(curr_data) - for k, v in var_templates.items(): - if isinstance(v, dict): - for key, value in v.items(): - var_templates[k][key] = jinja_render_template_strictly( - value, namespaces - ) - else: - var_templates[k] = jinja_render_template_strictly(v, namespaces) + var_templates = render_nested_var_templates(var_templates, namespaces) return var_templates def get_pipeline_schemas(self, schema_key=INPUT_SCHEMA_KEY): diff --git a/looper/utils.py b/looper/utils.py index fce5fa88..a1b5f67a 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -825,3 +825,33 @@ def inspect_looper_config_file(looper_config_dict) -> None: print("LOOPER INSPECT") for key, value in looper_config_dict.items(): print(f"{key} {value}") + + +def expand_nested_var_templates(var_templates_dict, namespaces): + + "Takes all var_templates as a dict and recursively expands any paths." + + result = {} + + for k, v in var_templates_dict.items(): + if isinstance(v, dict): + result[k] = expand_nested_var_templates(v, namespaces) + else: + result[k] = expandpath(v) + + return result + + +def render_nested_var_templates(var_templates_dict, namespaces): + + "Takes all var_templates as a dict and recursively renders the jinja templates." + + result = {} + + for k, v in var_templates_dict.items(): + if isinstance(v, dict): + result[k] = expand_nested_var_templates(v, namespaces) + else: + result[k] = jinja_render_template_strictly(v, namespaces) + + return result From 93e8d31848658cc45b8c76199bc63ea4c78d4465 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 19 Jun 2024 11:34:47 -0400 Subject: [PATCH 17/39] use deep_update for sample modifiers, bump ubiquerg version req --- looper/project.py | 10 ++++++---- requirements/requirements-all.txt | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/looper/project.py b/looper/project.py index e070233e..f8a4c7d6 100644 --- a/looper/project.py +++ b/looper/project.py @@ -20,7 +20,7 @@ from peppy import Project as peppyProject from peppy.utils import make_abs_via_cfg from pipestat import PipestatError, PipestatManager -from ubiquerg import expandpath, is_command_callable +from ubiquerg import expandpath, is_command_callable, deep_update from yacman import YAMLConfigManager from .conductor import write_pipestat_config @@ -138,10 +138,12 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): # add sample pipeline interface to the project if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): _LOGGER.warning( - "Sample modifiers were provided in Looper config. Overwriting those in PEP." + "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." + ) + deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) + _LOGGER.debug( + msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" ) - self.config.setdefault("sample_modifiers", {}) - self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) elif kwargs.get(SAMPLE_MODS_KEY): self.config.setdefault("sample_modifiers", {}) self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index bd28baa8..b7fd60ad 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -9,7 +9,7 @@ pipestat>=0.9.2 peppy>=0.40.2 pyyaml>=3.12 rich>=9.10.0 -ubiquerg>=0.5.2 +ubiquerg>=0.8.1 yacman==0.9.3 pydantic2-argparse>=0.9.2 psutil \ No newline at end of file From 2bbafd237d16678f6bc1c05a3806c0f3f436f73f Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 19 Jun 2024 11:43:08 -0400 Subject: [PATCH 18/39] fix ubiquerg version req --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index b7fd60ad..0a6960b0 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -9,7 +9,7 @@ pipestat>=0.9.2 peppy>=0.40.2 pyyaml>=3.12 rich>=9.10.0 -ubiquerg>=0.8.1 +ubiquerg>=0.8.1a1 yacman==0.9.3 pydantic2-argparse>=0.9.2 psutil \ No newline at end of file From c735d1eff54f19889f96b7876d5f39970c45adba Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 19 Jun 2024 15:20:07 -0400 Subject: [PATCH 19/39] add cli_modifiers and update prioritizing looper config entries --- looper/command_models/arguments.py | 6 +++--- looper/command_models/commands.py | 2 +- looper/utils.py | 31 +++++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index cd89845d..b776aeae 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -286,8 +286,8 @@ class ArgumentEnum(enum.Enum): default=(dict, None), description="sample modifiers. Note: add these via looper config file. Not the CLI.", ) - PROJECT_MODIFIERS = Argument( - name="project_modifiers", + CLI_MODIFIERS = Argument( + name="cli_modifiers", default=(dict, None), - description="project modifiers. Note: add these via looper config file. Not the CLI", + description="cli modifiers. Note: add these via looper config file. Not the CLI", ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 29080201..9c9a6691 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -62,7 +62,7 @@ def create_model(self) -> Type[pydantic.BaseModel]: ArgumentEnum.AMEND.value, ArgumentEnum.PROJECT_LEVEL.value, ArgumentEnum.SAMPLE_MODIFIERS.value, - ArgumentEnum.PROJECT_MODIFIERS.value, + ArgumentEnum.CLI_MODIFIERS.value, ] RunParser = Command( diff --git a/looper/utils.py b/looper/utils.py index a1b5f67a..f6e589d3 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -14,7 +14,7 @@ import yaml from peppy import Project as peppyProject from peppy.const import * -from ubiquerg import convert_value, expandpath, parse_registry_path +from ubiquerg import convert_value, expandpath, parse_registry_path, deep_update from pephubclient.constants import RegistryPath from pydantic import ValidationError @@ -270,6 +270,31 @@ def enrich_args_via_cfg(subcommand_name, parser_args, aux_parser, test_args=None if os.path.exists(parser_args.config_file) else dict() ) + + # If user provided project-level modifiers in the looper config, they are prioritized + if cfg_args_all: + for key, value in cfg_args_all.items(): + if getattr(parser_args, key, None): + new_value = getattr(parser_args, key) + cfg_args_all[key] = new_value + + looper_config_cli_modifiers = None + if getattr(parser_args, "cli_modifiers", None): + if str(subcommand_name) in parser_args.cli_modifiers: + looper_config_cli_modifiers = parser_args.cli_modifiers[subcommand_name] + looper_config_cli_modifiers = ( + {k.replace("-", "_"): v for k, v in looper_config_cli_modifiers.items()} + if looper_config_cli_modifiers + else None + ) + + if looper_config_cli_modifiers: + _LOGGER.warning( + "CLI modifiers were provided in Looper Config and in PEP Project Config. Merging..." + ) + deep_update(cfg_args_all, looper_config_cli_modifiers) + _LOGGER.debug(msg=f"Merged CLI modifiers: {cfg_args_all}") + result = argparse.Namespace() if test_args: cli_args, _ = aux_parser.parse_known_args(args=test_args) @@ -546,8 +571,8 @@ def read_looper_config_file(looper_config_path: str) -> dict: if SAMPLE_MODS_KEY in dp_data: return_dict[SAMPLE_MODS_KEY] = dp_data[SAMPLE_MODS_KEY] - if PROJ_MODS_KEY in dp_data: - return_dict[PROJ_MODS_KEY] = dp_data[PROJ_MODS_KEY] + if "cli_modifiers" in dp_data: + return_dict["cli_modifiers"] = dp_data["cli_modifiers"] if PIPELINE_INTERFACES_KEY in dp_data: dp_data.setdefault(PIPELINE_INTERFACES_KEY, {}) From ca23f397e71b87bfaf68579a3509880eb50a9365 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:21:34 -0400 Subject: [PATCH 20/39] remove sample_modifiers and cli_modifiers Arguments from CLI. --- looper/cli_pydantic.py | 19 +++++++++++++++---- looper/command_models/arguments.py | 10 ---------- looper/command_models/commands.py | 2 -- looper/const.py | 7 ++++--- looper/utils.py | 14 ++++++++++---- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 764c311c..a3d0728f 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -151,10 +151,15 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): looper_config_dict = read_looper_dotfile() _LOGGER.info(f"Using looper config ({looper_cfg_path}).") - # subcommand_args.sample_modifiers = {} - # subcommand_args.project_modifiers = {} + sample_modifiers_dict = None + cli_modifiers_dict = None for looper_config_key, looper_config_item in looper_config_dict.items(): - setattr(subcommand_args, looper_config_key, looper_config_item) + if looper_config_key == SAMPLE_MODS_KEY: + sample_modifiers_dict = looper_config_item + elif looper_config_key == CLI_MODS_KEY: + cli_modifiers_dict = looper_config_item + else: + setattr(subcommand_args, looper_config_key, looper_config_item) except OSError: parser.print_help(sys.stderr) @@ -170,7 +175,11 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): ) subcommand_args = enrich_args_via_cfg( - subcommand_name, subcommand_args, parser, test_args=test_args + subcommand_name, + subcommand_args, + parser, + test_args=test_args, + cli_modifiers=cli_modifiers_dict, ) # If project pipeline interface defined in the cli, change name to: "pipeline_interface" @@ -196,6 +205,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", + sample_modifiers=sample_modifiers_dict, **{ attr: getattr(subcommand_args, attr) for attr in CLI_PROJ_ATTRS @@ -211,6 +221,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", + sample_modifiers=sample_modifiers_dict, project_dict=PEPHubClient()._load_raw_pep( registry_path=subcommand_args.config_file ), diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index b776aeae..8c484d33 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -281,13 +281,3 @@ class ArgumentEnum(enum.Enum): default=(bool, False), description="Is this command executed for project-level?", ) - SAMPLE_MODIFIERS = Argument( - name="sample_modifiers", - default=(dict, None), - description="sample modifiers. Note: add these via looper config file. Not the CLI.", - ) - CLI_MODIFIERS = Argument( - name="cli_modifiers", - default=(dict, None), - description="cli modifiers. Note: add these via looper config file. Not the CLI", - ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 9c9a6691..233cfd0b 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -61,8 +61,6 @@ def create_model(self) -> Type[pydantic.BaseModel]: ArgumentEnum.SETTINGS.value, ArgumentEnum.AMEND.value, ArgumentEnum.PROJECT_LEVEL.value, - ArgumentEnum.SAMPLE_MODIFIERS.value, - ArgumentEnum.CLI_MODIFIERS.value, ] RunParser = Command( diff --git a/looper/const.py b/looper/const.py index d6449f2b..e25f911f 100644 --- a/looper/const.py +++ b/looper/const.py @@ -2,7 +2,7 @@ import os from enum import Enum -from peppy.const import SAMPLE_MODS_KEY, PROJ_MODS_KEY +from peppy.const import SAMPLE_MODS_KEY __author__ = "Databio lab" __email__ = "nathan@code.databio.org" @@ -94,6 +94,7 @@ "DEBUG_EIDO_VALIDATION", "LOOPER_GENERIC_OUTPUT_SCHEMA", "LOOPER_GENERIC_COUNT_LINES", + "CLI_MODS_KEY", ] FLAGS = ["completed", "running", "failed", "waiting", "partial"] @@ -223,6 +224,8 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): SAMPLE_PL_ARG = "sample_pipeline_interfaces" PROJECT_PL_ARG = "project_pipeline_interfaces" +CLI_MODS_KEY = "cli_modifiers" + DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) CLI_PROJ_ATTRS = [ @@ -238,8 +241,6 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): PIPESTAT_KEY, DEFAULT_PIPESTAT_CONFIG_ATTR, PEP_CONFIG_KEY, - SAMPLE_MODS_KEY, - PROJ_MODS_KEY, ] # resource package TSV-related consts diff --git a/looper/utils.py b/looper/utils.py index f6e589d3..55417250 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -253,7 +253,13 @@ def read_yaml_file(filepath): return data -def enrich_args_via_cfg(subcommand_name, parser_args, aux_parser, test_args=None): +def enrich_args_via_cfg( + subcommand_name, + parser_args, + aux_parser, + test_args=None, + cli_modifiers=None, +): """ Read in a looper dotfile and set arguments. @@ -279,9 +285,9 @@ def enrich_args_via_cfg(subcommand_name, parser_args, aux_parser, test_args=None cfg_args_all[key] = new_value looper_config_cli_modifiers = None - if getattr(parser_args, "cli_modifiers", None): - if str(subcommand_name) in parser_args.cli_modifiers: - looper_config_cli_modifiers = parser_args.cli_modifiers[subcommand_name] + if cli_modifiers: + if str(subcommand_name) in cli_modifiers: + looper_config_cli_modifiers = cli_modifiers[subcommand_name] looper_config_cli_modifiers = ( {k.replace("-", "_"): v for k, v in looper_config_cli_modifiers.items()} if looper_config_cli_modifiers From 377db1946433c2ae719058674191e534dc4f55f0 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:31:17 -0400 Subject: [PATCH 21/39] update comments to be more accurate --- looper/project.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/looper/project.py b/looper/project.py index f8a4c7d6..6487513c 100644 --- a/looper/project.py +++ b/looper/project.py @@ -135,7 +135,7 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): except NotImplementedError: self.name = None - # add sample pipeline interface to the project + # consolidate sample modifiers if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): _LOGGER.warning( "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." @@ -148,6 +148,7 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self.config.setdefault("sample_modifiers", {}) self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) + # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG)) From ed828d68b2f35fc63b02c3885232bde16c4f31b3 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:35:57 -0400 Subject: [PATCH 22/39] use CLI_MODS_KEY --- looper/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 55417250..9cc8eeaa 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -577,8 +577,8 @@ def read_looper_config_file(looper_config_path: str) -> dict: if SAMPLE_MODS_KEY in dp_data: return_dict[SAMPLE_MODS_KEY] = dp_data[SAMPLE_MODS_KEY] - if "cli_modifiers" in dp_data: - return_dict["cli_modifiers"] = dp_data["cli_modifiers"] + if CLI_MODS_KEY in dp_data: + return_dict[CLI_MODS_KEY] = dp_data[CLI_MODS_KEY] if PIPELINE_INTERFACES_KEY in dp_data: dp_data.setdefault(PIPELINE_INTERFACES_KEY, {}) From f93a8e299597466645790ff5444f2c70670c5f52 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:43:57 -0400 Subject: [PATCH 23/39] update changelog --- docs/changelog.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 24c3b9b8..58d07f9b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,6 +3,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [1.9.0] -- 2024-07-xx + +### Added +- user can now add sample modifiers and cli modifiers to looper config instead of PEP project [#270](https://github.com/pepkit/looper/issues/270) + ## [1.8.1] -- 2024-06-06 ### Fixed From 4bba529da7355f2c514ed776dba4def74e8bbed3 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:51:16 -0400 Subject: [PATCH 24/39] revert to using original pydantic-argparse instead of using forked version --- looper/command_models/commands.py | 2 +- requirements/requirements-all.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 233cfd0b..e764c99d 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -9,7 +9,7 @@ from ..const import MESSAGE_BY_SUBCOMMAND from .arguments import Argument, ArgumentEnum -from pydantic2_argparse import ArgumentParser +from pydantic_argparse import ArgumentParser @dataclass diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0a6960b0..dd40fd1d 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -11,5 +11,5 @@ pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.8.1a1 yacman==0.9.3 -pydantic2-argparse>=0.9.2 +pydantic-argparse>=0.9.0 psutil \ No newline at end of file From 6ba543c266c7d3db0cceaf0f4fa097a4199896bb Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:16:58 -0400 Subject: [PATCH 25/39] infer pipeline_type directly from interface #465 --- looper/const.py | 1 + looper/utils.py | 64 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/looper/const.py b/looper/const.py index e25f911f..017aceb4 100644 --- a/looper/const.py +++ b/looper/const.py @@ -95,6 +95,7 @@ "LOOPER_GENERIC_OUTPUT_SCHEMA", "LOOPER_GENERIC_COUNT_LINES", "CLI_MODS_KEY", + "PipelineLevel", ] FLAGS = ["completed", "running", "failed", "waiting", "partial"] diff --git a/looper/utils.py b/looper/utils.py index 9cc8eeaa..6e052f96 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -17,6 +17,7 @@ from ubiquerg import convert_value, expandpath, parse_registry_path, deep_update from pephubclient.constants import RegistryPath from pydantic import ValidationError +from yacman import load_yaml from .const import * from .command_models.commands import SUPPORTED_COMMANDS @@ -534,6 +535,33 @@ def initiate_looper_config( return True +def determine_pipeline_type(piface_path: str, looper_config_path: str): + """ + Read pipeline interface from disk and determine if pipeline type is sample or project-level + + + :param str piface_path: path to pipeline_interface + :param str looper_config_path: path to looper config file + :return Tuple[Union[str,None],Union[str,None]] : (pipeline type, resolved path) or (None, None) + """ + + if piface_path is None: + return None, None + piface_path = expandpath(piface_path) + if not os.path.isabs(piface_path): + piface_path = os.path.realpath( + os.path.join(os.path.dirname(looper_config_path), piface_path) + ) + try: + piface_dict = load_yaml(piface_path) + except FileNotFoundError: + return None, None + + pipeline_type = piface_dict.get("pipeline_type", None) + + return pipeline_type, piface_path + + def read_looper_config_file(looper_config_path: str) -> dict: """ Read Looper config file which includes: @@ -581,11 +609,39 @@ def read_looper_config_file(looper_config_path: str) -> dict: return_dict[CLI_MODS_KEY] = dp_data[CLI_MODS_KEY] if PIPELINE_INTERFACES_KEY in dp_data: + dp_data.setdefault(PIPELINE_INTERFACES_KEY, {}) - return_dict[SAMPLE_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get("sample") - return_dict[PROJECT_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get( - "project" - ) + + if isinstance(dp_data.get(PIPELINE_INTERFACES_KEY), dict) and ( + dp_data.get(PIPELINE_INTERFACES_KEY).get("sample") + or dp_data.get(PIPELINE_INTERFACES_KEY).get("project") + ): + # Support original nesting of pipeline interfaces under "sample" and "project" + return_dict[SAMPLE_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get( + "sample" + ) + return_dict[PROJECT_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get( + "project" + ) + else: + # infer pipeline type based from interface instead of nested keys: https://github.com/pepkit/looper/issues/465 + all_pipeline_interfaces = dp_data.get(PIPELINE_INTERFACES_KEY) + sample_pifaces = [] + project_pifaces = [] + if isinstance(all_pipeline_interfaces, str): + all_pipeline_interfaces = [all_pipeline_interfaces] + for piface in all_pipeline_interfaces: + pipeline_type, piface_path = determine_pipeline_type( + piface, looper_config_path + ) + if pipeline_type == PipelineLevel.SAMPLE.value: + sample_pifaces.append(piface_path) + elif pipeline_type == PipelineLevel.PROJECT.value: + project_pifaces.append(piface_path) + if len(sample_pifaces) > 0: + return_dict[SAMPLE_PL_ARG] = sample_pifaces + if len(project_pifaces) > 0: + return_dict[PROJECT_PL_ARG] = project_pifaces else: _LOGGER.warning( From 0edd358c30bcb1391ed05f0a77815cc89df43da9 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:20:32 -0400 Subject: [PATCH 26/39] fix pydantic argparse typo --- looper/cli_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index a3d0728f..a4a61e69 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -25,7 +25,7 @@ import yaml from eido import inspect_project from pephubclient import PEPHubClient -from pydantic2_argparse.argparse.parser import ArgumentParser +from pydantic_argparse.argparse.parser import ArgumentParser from divvy import select_divvy_config From 04fa363405fbca1afaaddfd486d3bdbea0340b4e Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:25:45 -0400 Subject: [PATCH 27/39] try again fix pydantic argparse typo --- looper/cli_pydantic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index a4a61e69..31fd7577 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -21,7 +21,7 @@ import sys import logmuse -import pydantic2_argparse +import pydantic_argparse import yaml from eido import inspect_project from pephubclient import PEPHubClient @@ -338,7 +338,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): def main(test_args=None) -> None: - parser = pydantic2_argparse.ArgumentParser( + parser = pydantic_argparse.ArgumentParser( model=TopLevelParser, prog="looper", description="Looper: A job submitter for Portable Encapsulated Projects", From aa6eb77543240672b3d12c2ddbe195670d4efeca Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:41:02 -0400 Subject: [PATCH 28/39] update changelog --- docs/changelog.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 58d07f9b..ce4c313e 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,10 +3,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [1.9.0] -- 2024-07-xx +## [2.0.0] -- 2024-07-xx ### Added - user can now add sample modifiers and cli modifiers to looper config instead of PEP project [#270](https://github.com/pepkit/looper/issues/270) +- pipeline interfaces no longer must be nested under sample and project keys within looper config file [#465](https://github.com/pepkit/looper/issues/465) ## [1.8.1] -- 2024-06-06 From 7b2c229ad505f2ee2216f78296a2ed974ad23679 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 21 Jun 2024 15:51:54 -0400 Subject: [PATCH 29/39] clean up imports #476 --- looper/cli_pydantic.py | 2 -- looper/const.py | 1 - looper/divvy.py | 6 ------ looper/looper.py | 2 -- looper/pipeline_interface.py | 2 +- looper/project.py | 8 ++------ looper/utils.py | 5 ++--- 7 files changed, 5 insertions(+), 21 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 31fd7577..9e5aca16 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -17,7 +17,6 @@ # with types. from __future__ import annotations -import os import sys import logmuse @@ -29,7 +28,6 @@ from divvy import select_divvy_config -from .const import PipelineLevel from . import __version__ from .command_models.arguments import ArgumentEnum diff --git a/looper/const.py b/looper/const.py index 017aceb4..e38cee18 100644 --- a/looper/const.py +++ b/looper/const.py @@ -2,7 +2,6 @@ import os from enum import Enum -from peppy.const import SAMPLE_MODS_KEY __author__ = "Databio lab" __email__ = "nathan@code.databio.org" diff --git a/looper/divvy.py b/looper/divvy.py index bd880f94..38dd8fe2 100644 --- a/looper/divvy.py +++ b/looper/divvy.py @@ -1,18 +1,13 @@ """ Computing configuration representation """ import logging -import logmuse import os -import sys import shutil -import yaml from shutil import copytree from yacman import FutureYAMLConfigManager as YAMLConfigManager from yacman import write_lock, FILEPATH_KEY, load_yaml, select_config -from yaml import SafeLoader -from ubiquerg import is_writable, VersionInHelpParser from .const import ( @@ -24,7 +19,6 @@ ) from .utils import write_submit_script -# from . import __version__ _LOGGER = logging.getLogger(__name__) diff --git a/looper/looper.py b/looper/looper.py index 1eea6edd..18f0d9ed 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -33,14 +33,12 @@ from rich.console import Console from rich.table import Table from ubiquerg.cli_tools import query_yes_no -from ubiquerg.collection import uniqify from .conductor import SubmissionConductor from .exceptions import * from .const import * -from .pipeline_interface import PipelineInterface from .project import Project from .utils import ( desired_samples_range_skipped, diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index 1064f20f..f796354a 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -17,7 +17,7 @@ InvalidResourceSpecificationException, PipelineInterfaceConfigError, ) -from .utils import jinja_render_template_strictly, render_nested_var_templates +from .utils import render_nested_var_templates __author__ = "Michal Stolarczyk" __email__ = "michal@virginia.edu" diff --git a/looper/project.py b/looper/project.py index 6487513c..6daecac1 100644 --- a/looper/project.py +++ b/looper/project.py @@ -10,18 +10,14 @@ except ImportError: # cached_property was introduced in python 3.8 cached_property = property -from logging import getLogger from .divvy import ComputingConfiguration from eido import PathAttrNotFoundError, read_schema from jsonschema import ValidationError from pandas.core.common import flatten -from peppy import CONFIG_KEY, OUTDIR_KEY -from peppy import Project as peppyProject from peppy.utils import make_abs_via_cfg -from pipestat import PipestatError, PipestatManager -from ubiquerg import expandpath, is_command_callable, deep_update -from yacman import YAMLConfigManager +from pipestat import PipestatManager + from .conductor import write_pipestat_config from .exceptions import * diff --git a/looper/utils.py b/looper/utils.py index 6e052f96..26776db7 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -1,12 +1,11 @@ """ Helpers without an obvious logical home. """ import argparse -from collections import defaultdict, namedtuple +from collections import defaultdict import glob import itertools from logging import getLogger import os -import sys from typing import * import re @@ -21,7 +20,7 @@ from .const import * from .command_models.commands import SUPPORTED_COMMANDS -from .exceptions import MisconfigurationException, RegistryPathException +from .exceptions import MisconfigurationException _LOGGER = getLogger(__name__) From b46543cd91337d6a46dda1beac01bb3bb71eb3c4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:36:06 -0400 Subject: [PATCH 30/39] cli_modifiers key reverted to `cli` --- looper/const.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/const.py b/looper/const.py index e38cee18..0243dc88 100644 --- a/looper/const.py +++ b/looper/const.py @@ -224,7 +224,7 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): SAMPLE_PL_ARG = "sample_pipeline_interfaces" PROJECT_PL_ARG = "project_pipeline_interfaces" -CLI_MODS_KEY = "cli_modifiers" +CLI_MODS_KEY = "cli" DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) From 3175f27f18b595b8b1634261990c5c63551d39d5 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:16:24 -0400 Subject: [PATCH 31/39] fix bug with None type for non-existing project arguments --- looper/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/looper/utils.py b/looper/utils.py index 26776db7..eaeb0e2e 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -283,6 +283,8 @@ def enrich_args_via_cfg( if getattr(parser_args, key, None): new_value = getattr(parser_args, key) cfg_args_all[key] = new_value + else: + cfg_args_all = {} looper_config_cli_modifiers = None if cli_modifiers: From 2d6230ac71af4bdbc37fa0ccec75e16d4a39c99e Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:19:19 -0400 Subject: [PATCH 32/39] Refactor CLI_MODS_KEY to CLI_KEY --- looper/cli_pydantic.py | 2 +- looper/const.py | 2 -- looper/utils.py | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 9e5aca16..1da526e5 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -154,7 +154,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): for looper_config_key, looper_config_item in looper_config_dict.items(): if looper_config_key == SAMPLE_MODS_KEY: sample_modifiers_dict = looper_config_item - elif looper_config_key == CLI_MODS_KEY: + elif looper_config_key == CLI_KEY: cli_modifiers_dict = looper_config_item else: setattr(subcommand_args, looper_config_key, looper_config_item) diff --git a/looper/const.py b/looper/const.py index 0243dc88..bf40954d 100644 --- a/looper/const.py +++ b/looper/const.py @@ -93,7 +93,6 @@ "DEBUG_EIDO_VALIDATION", "LOOPER_GENERIC_OUTPUT_SCHEMA", "LOOPER_GENERIC_COUNT_LINES", - "CLI_MODS_KEY", "PipelineLevel", ] @@ -224,7 +223,6 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): SAMPLE_PL_ARG = "sample_pipeline_interfaces" PROJECT_PL_ARG = "project_pipeline_interfaces" -CLI_MODS_KEY = "cli" DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) diff --git a/looper/utils.py b/looper/utils.py index eaeb0e2e..4eb8ca4b 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -606,8 +606,8 @@ def read_looper_config_file(looper_config_path: str) -> dict: if SAMPLE_MODS_KEY in dp_data: return_dict[SAMPLE_MODS_KEY] = dp_data[SAMPLE_MODS_KEY] - if CLI_MODS_KEY in dp_data: - return_dict[CLI_MODS_KEY] = dp_data[CLI_MODS_KEY] + if CLI_KEY in dp_data: + return_dict[CLI_KEY] = dp_data[CLI_KEY] if PIPELINE_INTERFACES_KEY in dp_data: From 9d253e42de48e56496969952e1275a18aa633ac4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:35:07 -0400 Subject: [PATCH 33/39] attempt to create modified PEP via dictionary --- looper/cli_pydantic.py | 10 ++++----- looper/const.py | 3 ++- looper/project.py | 48 +++++++++++++++++++++++++++++++----------- looper/utils.py | 4 ++-- 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 1da526e5..e979b6eb 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -149,11 +149,11 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): looper_config_dict = read_looper_dotfile() _LOGGER.info(f"Using looper config ({looper_cfg_path}).") - sample_modifiers_dict = None + pep_update_dict = None cli_modifiers_dict = None for looper_config_key, looper_config_item in looper_config_dict.items(): - if looper_config_key == SAMPLE_MODS_KEY: - sample_modifiers_dict = looper_config_item + if looper_config_key == PEP_UPDATE_KEY: + pep_update_dict = looper_config_item elif looper_config_key == CLI_KEY: cli_modifiers_dict = looper_config_item else: @@ -203,7 +203,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - sample_modifiers=sample_modifiers_dict, + pep_update=pep_update_dict, **{ attr: getattr(subcommand_args, attr) for attr in CLI_PROJ_ATTRS @@ -219,7 +219,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - sample_modifiers=sample_modifiers_dict, + pep_update=pep_update_dict, project_dict=PEPHubClient()._load_raw_pep( registry_path=subcommand_args.config_file ), diff --git a/looper/const.py b/looper/const.py index bf40954d..9e39f506 100644 --- a/looper/const.py +++ b/looper/const.py @@ -94,6 +94,7 @@ "LOOPER_GENERIC_OUTPUT_SCHEMA", "LOOPER_GENERIC_COUNT_LINES", "PipelineLevel", + "PEP_UPDATE_KEY", ] FLAGS = ["completed", "running", "failed", "waiting", "partial"] @@ -222,7 +223,7 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): ALL_SUBCMD_KEY = "all" SAMPLE_PL_ARG = "sample_pipeline_interfaces" PROJECT_PL_ARG = "project_pipeline_interfaces" - +PEP_UPDATE_KEY = "pep_update" DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) diff --git a/looper/project.py b/looper/project.py index 6daecac1..2e7f4c65 100644 --- a/looper/project.py +++ b/looper/project.py @@ -111,7 +111,31 @@ class Project(peppyProject): """ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): - super(Project, self).__init__(cfg=cfg, amendments=amendments) + + if kwargs.get("pep_update"): + + _LOGGER.warning( + "Pep update key supplied in looper config. Merging with PEP Project" + ) + + pep_update_dict = kwargs.get("pep_update") + temp_pep = peppyProject(cfg=cfg, amendments=amendments) + dict_obj_pep = temp_pep.to_dict(extended=True) # to make this a real PEP + + if SAMPLE_MODS_KEY in pep_update_dict: + dict_obj_pep.setdefault(SAMPLE_MODS_KEY, {}) + deep_update( + dict_obj_pep[SAMPLE_MODS_KEY], pep_update_dict[SAMPLE_MODS_KEY] + ) + if PROJ_MODS_KEY in pep_update_dict: + dict_obj_pep.setdefault(PROJ_MODS_KEY, {}) + deep_update(dict_obj_pep[PROJ_MODS_KEY], pep_update_dict[PROJ_MODS_KEY]) + + super(Project, self).from_dict(pep_dictionary=dict_obj_pep) + + else: + super(Project, self).__init__(cfg=cfg, amendments=amendments) + prj_dict = kwargs.get("project_dict") pep_config = kwargs.get("pep_config", None) if pep_config: @@ -132,17 +156,17 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self.name = None # consolidate sample modifiers - if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): - _LOGGER.warning( - "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." - ) - deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) - _LOGGER.debug( - msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" - ) - elif kwargs.get(SAMPLE_MODS_KEY): - self.config.setdefault("sample_modifiers", {}) - self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) + # if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): + # _LOGGER.warning( + # "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." + # ) + # deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) + # _LOGGER.debug( + # msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" + # ) + # elif kwargs.get(SAMPLE_MODS_KEY): + # self.config.setdefault("sample_modifiers", {}) + # self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): diff --git a/looper/utils.py b/looper/utils.py index 4eb8ca4b..af1d80c9 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -603,8 +603,8 @@ def read_looper_config_file(looper_config_path: str) -> dict: if PIPESTAT_KEY in dp_data: return_dict[PIPESTAT_KEY] = dp_data[PIPESTAT_KEY] - if SAMPLE_MODS_KEY in dp_data: - return_dict[SAMPLE_MODS_KEY] = dp_data[SAMPLE_MODS_KEY] + if PEP_UPDATE_KEY in dp_data: + return_dict[PEP_UPDATE_KEY] = dp_data[PEP_UPDATE_KEY] if CLI_KEY in dp_data: return_dict[CLI_KEY] = dp_data[CLI_KEY] From c8e86abdcdf053b514fc46aa9315d25052f0f771 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 25 Jun 2024 11:03:48 -0400 Subject: [PATCH 34/39] fix modifying PEP via _from_dict() --- looper/project.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/looper/project.py b/looper/project.py index 2e7f4c65..138873c5 100644 --- a/looper/project.py +++ b/looper/project.py @@ -112,10 +112,12 @@ class Project(peppyProject): def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): + super(Project, self).__init__(cfg=cfg, amendments=amendments) + if kwargs.get("pep_update"): _LOGGER.warning( - "Pep update key supplied in looper config. Merging with PEP Project" + "PEP update key supplied in looper config. Merging with PEP Project" ) pep_update_dict = kwargs.get("pep_update") @@ -131,10 +133,7 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): dict_obj_pep.setdefault(PROJ_MODS_KEY, {}) deep_update(dict_obj_pep[PROJ_MODS_KEY], pep_update_dict[PROJ_MODS_KEY]) - super(Project, self).from_dict(pep_dictionary=dict_obj_pep) - - else: - super(Project, self).__init__(cfg=cfg, amendments=amendments) + self._from_dict(pep_dictionary=dict_obj_pep) prj_dict = kwargs.get("project_dict") pep_config = kwargs.get("pep_config", None) @@ -155,19 +154,6 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): except NotImplementedError: self.name = None - # consolidate sample modifiers - # if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): - # _LOGGER.warning( - # "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." - # ) - # deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) - # _LOGGER.debug( - # msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" - # ) - # elif kwargs.get(SAMPLE_MODS_KEY): - # self.config.setdefault("sample_modifiers", {}) - # self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) - # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG)) From 5edaf40478793da95e7179faf7fcde660554a421 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:45:47 -0400 Subject: [PATCH 35/39] Revert "fix modifying PEP via _from_dict()" This reverts commit c8e86abdcdf053b514fc46aa9315d25052f0f771. --- looper/project.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/looper/project.py b/looper/project.py index 138873c5..2e7f4c65 100644 --- a/looper/project.py +++ b/looper/project.py @@ -112,12 +112,10 @@ class Project(peppyProject): def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): - super(Project, self).__init__(cfg=cfg, amendments=amendments) - if kwargs.get("pep_update"): _LOGGER.warning( - "PEP update key supplied in looper config. Merging with PEP Project" + "Pep update key supplied in looper config. Merging with PEP Project" ) pep_update_dict = kwargs.get("pep_update") @@ -133,7 +131,10 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): dict_obj_pep.setdefault(PROJ_MODS_KEY, {}) deep_update(dict_obj_pep[PROJ_MODS_KEY], pep_update_dict[PROJ_MODS_KEY]) - self._from_dict(pep_dictionary=dict_obj_pep) + super(Project, self).from_dict(pep_dictionary=dict_obj_pep) + + else: + super(Project, self).__init__(cfg=cfg, amendments=amendments) prj_dict = kwargs.get("project_dict") pep_config = kwargs.get("pep_config", None) @@ -154,6 +155,19 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): except NotImplementedError: self.name = None + # consolidate sample modifiers + # if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): + # _LOGGER.warning( + # "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." + # ) + # deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) + # _LOGGER.debug( + # msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" + # ) + # elif kwargs.get(SAMPLE_MODS_KEY): + # self.config.setdefault("sample_modifiers", {}) + # self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) + # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG)) From 68a58e0c18593746e0d1eae5ea44de055298e787 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:45:47 -0400 Subject: [PATCH 36/39] Revert "attempt to create modified PEP via dictionary" This reverts commit 9d253e42de48e56496969952e1275a18aa633ac4. --- looper/cli_pydantic.py | 10 ++++----- looper/const.py | 3 +-- looper/project.py | 48 +++++++++++------------------------------- looper/utils.py | 4 ++-- 4 files changed, 20 insertions(+), 45 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index e979b6eb..1da526e5 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -149,11 +149,11 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): looper_config_dict = read_looper_dotfile() _LOGGER.info(f"Using looper config ({looper_cfg_path}).") - pep_update_dict = None + sample_modifiers_dict = None cli_modifiers_dict = None for looper_config_key, looper_config_item in looper_config_dict.items(): - if looper_config_key == PEP_UPDATE_KEY: - pep_update_dict = looper_config_item + if looper_config_key == SAMPLE_MODS_KEY: + sample_modifiers_dict = looper_config_item elif looper_config_key == CLI_KEY: cli_modifiers_dict = looper_config_item else: @@ -203,7 +203,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - pep_update=pep_update_dict, + sample_modifiers=sample_modifiers_dict, **{ attr: getattr(subcommand_args, attr) for attr in CLI_PROJ_ATTRS @@ -219,7 +219,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - pep_update=pep_update_dict, + sample_modifiers=sample_modifiers_dict, project_dict=PEPHubClient()._load_raw_pep( registry_path=subcommand_args.config_file ), diff --git a/looper/const.py b/looper/const.py index 9e39f506..bf40954d 100644 --- a/looper/const.py +++ b/looper/const.py @@ -94,7 +94,6 @@ "LOOPER_GENERIC_OUTPUT_SCHEMA", "LOOPER_GENERIC_COUNT_LINES", "PipelineLevel", - "PEP_UPDATE_KEY", ] FLAGS = ["completed", "running", "failed", "waiting", "partial"] @@ -223,7 +222,7 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): ALL_SUBCMD_KEY = "all" SAMPLE_PL_ARG = "sample_pipeline_interfaces" PROJECT_PL_ARG = "project_pipeline_interfaces" -PEP_UPDATE_KEY = "pep_update" + DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) diff --git a/looper/project.py b/looper/project.py index 2e7f4c65..6daecac1 100644 --- a/looper/project.py +++ b/looper/project.py @@ -111,31 +111,7 @@ class Project(peppyProject): """ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): - - if kwargs.get("pep_update"): - - _LOGGER.warning( - "Pep update key supplied in looper config. Merging with PEP Project" - ) - - pep_update_dict = kwargs.get("pep_update") - temp_pep = peppyProject(cfg=cfg, amendments=amendments) - dict_obj_pep = temp_pep.to_dict(extended=True) # to make this a real PEP - - if SAMPLE_MODS_KEY in pep_update_dict: - dict_obj_pep.setdefault(SAMPLE_MODS_KEY, {}) - deep_update( - dict_obj_pep[SAMPLE_MODS_KEY], pep_update_dict[SAMPLE_MODS_KEY] - ) - if PROJ_MODS_KEY in pep_update_dict: - dict_obj_pep.setdefault(PROJ_MODS_KEY, {}) - deep_update(dict_obj_pep[PROJ_MODS_KEY], pep_update_dict[PROJ_MODS_KEY]) - - super(Project, self).from_dict(pep_dictionary=dict_obj_pep) - - else: - super(Project, self).__init__(cfg=cfg, amendments=amendments) - + super(Project, self).__init__(cfg=cfg, amendments=amendments) prj_dict = kwargs.get("project_dict") pep_config = kwargs.get("pep_config", None) if pep_config: @@ -156,17 +132,17 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self.name = None # consolidate sample modifiers - # if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): - # _LOGGER.warning( - # "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." - # ) - # deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) - # _LOGGER.debug( - # msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" - # ) - # elif kwargs.get(SAMPLE_MODS_KEY): - # self.config.setdefault("sample_modifiers", {}) - # self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) + if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): + _LOGGER.warning( + "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." + ) + deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) + _LOGGER.debug( + msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" + ) + elif kwargs.get(SAMPLE_MODS_KEY): + self.config.setdefault("sample_modifiers", {}) + self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): diff --git a/looper/utils.py b/looper/utils.py index af1d80c9..4eb8ca4b 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -603,8 +603,8 @@ def read_looper_config_file(looper_config_path: str) -> dict: if PIPESTAT_KEY in dp_data: return_dict[PIPESTAT_KEY] = dp_data[PIPESTAT_KEY] - if PEP_UPDATE_KEY in dp_data: - return_dict[PEP_UPDATE_KEY] = dp_data[PEP_UPDATE_KEY] + if SAMPLE_MODS_KEY in dp_data: + return_dict[SAMPLE_MODS_KEY] = dp_data[SAMPLE_MODS_KEY] if CLI_KEY in dp_data: return_dict[CLI_KEY] = dp_data[CLI_KEY] From cddb3be26218fd70aa6432b3b85ad90fb3ff85ae Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:49:39 -0400 Subject: [PATCH 37/39] lint --- looper/const.py | 1 - 1 file changed, 1 deletion(-) diff --git a/looper/const.py b/looper/const.py index bf40954d..de080260 100644 --- a/looper/const.py +++ b/looper/const.py @@ -224,7 +224,6 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): PROJECT_PL_ARG = "project_pipeline_interfaces" - DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) CLI_PROJ_ATTRS = [ OUTDIR_KEY, From 34f3db7c918d7aa9af2f036dcfe813c064731561 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:54:11 -0400 Subject: [PATCH 38/39] remove modifying sample_modifiers --- looper/cli_pydantic.py | 7 +------ looper/project.py | 13 ------------- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 1da526e5..16bb6206 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -149,12 +149,9 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): looper_config_dict = read_looper_dotfile() _LOGGER.info(f"Using looper config ({looper_cfg_path}).") - sample_modifiers_dict = None cli_modifiers_dict = None for looper_config_key, looper_config_item in looper_config_dict.items(): - if looper_config_key == SAMPLE_MODS_KEY: - sample_modifiers_dict = looper_config_item - elif looper_config_key == CLI_KEY: + if looper_config_key == CLI_KEY: cli_modifiers_dict = looper_config_item else: setattr(subcommand_args, looper_config_key, looper_config_item) @@ -203,7 +200,6 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - sample_modifiers=sample_modifiers_dict, **{ attr: getattr(subcommand_args, attr) for attr in CLI_PROJ_ATTRS @@ -219,7 +215,6 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): amendments=subcommand_args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - sample_modifiers=sample_modifiers_dict, project_dict=PEPHubClient()._load_raw_pep( registry_path=subcommand_args.config_file ), diff --git a/looper/project.py b/looper/project.py index 6daecac1..7a652db7 100644 --- a/looper/project.py +++ b/looper/project.py @@ -131,19 +131,6 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): except NotImplementedError: self.name = None - # consolidate sample modifiers - if kwargs.get(SAMPLE_MODS_KEY) and self._modifier_exists(): - _LOGGER.warning( - "Sample modifiers were provided in Looper Config and in PEP Project Config. Merging..." - ) - deep_update(self.config["sample_modifiers"], kwargs.get(SAMPLE_MODS_KEY)) - _LOGGER.debug( - msg=f"Merged sample modifiers: {self.config['sample_modifiers']}" - ) - elif kwargs.get(SAMPLE_MODS_KEY): - self.config.setdefault("sample_modifiers", {}) - self.config["sample_modifiers"] = kwargs.get(SAMPLE_MODS_KEY) - # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG)) From 497ffdc43d14c424fdca178644c37c596a76670e Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 25 Jun 2024 16:29:48 -0400 Subject: [PATCH 39/39] update changelog and version for 1.9.0 release --- docs/changelog.md | 6 ++++-- looper/_version.py | 2 +- looper/cli_pydantic.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index ce4c313e..3b619423 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,11 +3,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [2.0.0] -- 2024-07-xx +## [1.9.0] -- 2024-06-26 ### Added -- user can now add sample modifiers and cli modifiers to looper config instead of PEP project [#270](https://github.com/pepkit/looper/issues/270) +- user can now add cli modifiers to looper config instead of PEP project [#270](https://github.com/pepkit/looper/issues/270) - pipeline interfaces no longer must be nested under sample and project keys within looper config file [#465](https://github.com/pepkit/looper/issues/465) +- var_templates can now be hierarchical [#334](https://github.com/pepkit/looper/issues/334) +- looper can now gracefully halt spawned subprocesses when the user sends a keyboard interrupt [#37](https://github.com/pepkit/looper/issues/37) ## [1.8.1] -- 2024-06-06 diff --git a/looper/_version.py b/looper/_version.py index 2ce1f658..65d6992c 100644 --- a/looper/_version.py +++ b/looper/_version.py @@ -1,2 +1,2 @@ -__version__ = "1.8.1" +__version__ = "1.9.0" # You must change the version in parser = pydantic2_argparse.ArgumentParser in cli_pydantic.py!!! diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 16bb6206..c6cda827 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -336,7 +336,7 @@ def main(test_args=None) -> None: prog="looper", description="Looper: A job submitter for Portable Encapsulated Projects", add_help=True, - version="1.8.1", + version="1.9.0", ) parser = add_short_arguments(parser, ArgumentEnum)