From 2a1fa67450fbbc0d99d7c002379f316e6f30dd89 Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Thu, 28 Sep 2023 11:29:11 +0200 Subject: [PATCH 1/6] Can copy from one division to another --- sbpack/noncwl/copy_app.py | 92 +++++++++++++++++++++++++++++++-------- sbpack/noncwl/utils.py | 82 ++++++++++++++++++++++------------ 2 files changed, 128 insertions(+), 46 deletions(-) diff --git a/sbpack/noncwl/copy_app.py b/sbpack/noncwl/copy_app.py index fae5b0b..ee98f28 100644 --- a/sbpack/noncwl/copy_app.py +++ b/sbpack/noncwl/copy_app.py @@ -1,8 +1,7 @@ import argparse import logging import sbpack.lib as lib -# from sevenbridges.errors import NotFound -from sbpack.noncwl.utils import install_or_upgrade_app +from sbpack.noncwl.utils import install_or_upgrade_app, push_zip logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -11,34 +10,93 @@ def main(): # CLI parameters parser = argparse.ArgumentParser() - parser.add_argument("--profile", default="default", - help="SB platform profile as set in the SB API " - "credentials file.") - parser.add_argument("--appid", required=True, - help="What to copy? Takes the form {user or division}/{project}/{app_id} " - "or {user or division}/{project}/{app_id}/{revision_no}.") - parser.add_argument("--projectid", required=True, - help="Where to copy? Takes the form " - "{user or division}/{project}") + parser.add_argument( + "--profile", default="default", nargs="+", + help="SB platform profile as set in the SB API credentials file. If " + "you are using sbcopy to copy an app from one division to " + "another, please provide two profiles - first profile for the " + "source app (appid), and second for the destination project " + "(projectid)." + ) + parser.add_argument( + "--appid", required=True, + help="What to copy? Takes the form " + "{user or division}/{project}/{app_id} or " + "{user or division}/{project}/{app_id}/{revision_no}." + ) + parser.add_argument( + "--projectid", required=True, + help="Where to copy? Takes the form {user or division}/{project}" + ) args = parser.parse_args() # Preprocess CLI parameter values # Init api - api = lib.get_profile(args.profile) + if len(args.profile) > 1: + api_source = lib.get_profile(args.profile[0]) + api_dest = lib.get_profile(args.profile[1]) + else: + api_source = lib.get_profile(args.profile[0]) + api_dest = api_source # Source and destination apps - source_app = api.apps.get(args.appid) + source_app = api_source.apps.get(args.appid) sb_app_raw = source_app.raw destination_app_id = args.projectid + '/' + args.appid.split('/')[2] # Copy the code package - source_package = source_app.raw.get('app_content', {}).get('code_package', '') + source_package = source_app.raw.get( + 'app_content', {} + ).get('code_package', '') + if source_package: - new_file = api.files.get(api.files.get(source_package).copy(project=args.projectid)) - sb_app_raw['app_content']['code_package'] = new_file.id + # The app_content.code_package field exists and contains the id of the + # code package file + + source_package_file = api_source.files.get(source_package) + if api_source == api_dest: + # Copy has been performed in the same division/env + # Copy the file to the destination through the API + new_file_id = api_source.files.get( + source_package_file.copy(project=args.projectid) + ).id + else: + # Copy has been performed between two different divisions/envs + # Download the file + name = source_package_file.name + source_package_file.download( + path=name, + overwrite=True + ) + + # Find out if the parent folder is the root of the project + parent = source_package_file.parent + project = api_source.projects.get(source_package_file.project) + if parent == project.root_folder: + # If the parent is the root, then set folder name to None + # This means that the code package will go into the root of the + # destination project + folder_name = None + else: + # Parent is not the root, so use it when pushing the zip file + # to preserve the folder structure + folder_name = api_source.files.get(parent).name + + # Push the zip to the destination project + new_file_id = push_zip( + api=api_dest, + zip_path=name, + project_id=args.projectid, + folder_name=folder_name + ) + # With this complete the code package is now at the destination + + # Change the id of the code package to the new file + sb_app_raw['app_content']['code_package'] = new_file_id - install_or_upgrade_app(api,destination_app_id, sb_app_raw) + # Use the install_or_upgrade_app function to copy the app + install_or_upgrade_app(api_dest, destination_app_id, sb_app_raw) if __name__ == "__main__": diff --git a/sbpack/noncwl/utils.py b/sbpack/noncwl/utils.py index 9cee09d..f4ee6f2 100644 --- a/sbpack/noncwl/utils.py +++ b/sbpack/noncwl/utils.py @@ -191,16 +191,26 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name): Create .zip package file. Upload .zip file to the designated folder for packages on SevenBridges Platform. Delete local .zip file. """ + zip_path = zip_directory(workflow_path) + return push_zip(api, zip_path, project_id, folder_name) - # This will create a temporary directory that will store all files from the - # original directory, except for the .git hidden directory. This dir - # sometimes collects a large amount of files that will not be used by the - # tool, and can increase the size of the archive up to 10 times. - source_path = os.path.abspath(workflow_path) - destination_path = source_path + '_' + time.strftime("%Y%m%d-%H%M%S") - zip_path = destination_path + '.zip' - os.mkdir(destination_path) +def update_timestamp(file_name): + return re.sub( + r"(?:\.|)_\d{8}-\d{6}$", "", file_name + ) + f'_{time.strftime("%Y%m%d-%H%M%S")}' + + +def zip_directory(workflow_path): + """ + This will create a temporary directory that will store all files from the + original directory, except for the .git hidden directory. This dir + sometimes collects a large amount of files that will not be used by the + tool, and can increase the size of the archive up to 10 times. + """ + + intermediary_dir = update_timestamp(os.path.abspath(workflow_path)) + os.mkdir(intermediary_dir) for root, dirs, files in os.walk(workflow_path): pattern = re.compile(r'(?:^|.*/)\.git(?:$|/.*)') @@ -210,45 +220,62 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name): dirs = [d for d in dirs if not re.match(pattern, d)] for d in dirs: source_file = os.path.join(root, d) - directory_path = os.path.join(destination_path, os.path.relpath( + directory_path = os.path.join(intermediary_dir, os.path.relpath( source_file, workflow_path)) if not os.path.exists(directory_path): os.mkdir(directory_path) for file in files: source_file = os.path.join(root, file) - dest_file = os.path.join(destination_path, os.path.relpath( + dest_file = os.path.join(intermediary_dir, os.path.relpath( source_file, workflow_path)) shutil.copy2(source_file, dest_file) shutil.make_archive( - destination_path, + intermediary_dir, 'zip', - root_dir=destination_path, + root_dir=intermediary_dir, base_dir='./' ) + shutil.rmtree(intermediary_dir) + print(f'Temporary local folder {intermediary_dir} deleted.') + + return intermediary_dir + '.zip' + + +def push_zip(api, zip_path, project_id, folder_name=None): if os.path.getsize(zip_path) > PACKAGE_SIZE_LIMIT: logger.error(f"File size too big: {os.path.getsize(zip_path)}") raise FileExistsError # Add the right error - folder_found = list(api.files.query( - project=project_id, - names=[folder_name], - ).all()) - - if not folder_found: - folder_created = api.files.create_folder( - project=api.projects.get(project_id), - name=folder_name - ) - folder_id = folder_created.id - else: - folder_id = folder_found[0].id + folder_id = None + if folder_name: + # check if the folder already exists + folder_found = list(api.files.query( + project=project_id, + names=[folder_name], + ).all()) + + if folder_found: + folder_id = folder_found[0].id + else: + # if the folder does not exist, make it + folder_created = api.files.create_folder( + project=api.projects.get(project_id), + name=folder_name + ) + folder_id = folder_created.id print(f'Uploading file {zip_path}, ' f'please wait for the upload to complete.') - u = api.files.upload(zip_path, parent=folder_id, overwrite=False) + + u = api.files.upload( + zip_path, + parent=folder_id, + project=project_id if not folder_id else None, + overwrite=False + ) uploaded_file_id = u.result().id print(f'Upload complete!') @@ -256,9 +283,6 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name): os.remove(zip_path) print(f'Temporary local file {zip_path} deleted.') - shutil.rmtree(destination_path) - print(f'Temporary local folder {destination_path} deleted.') - return uploaded_file_id From 434b7a56b592342545869d9ccb402e3a30353d1c Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Thu, 28 Sep 2023 14:37:01 +0200 Subject: [PATCH 2/6] Can copy from one division to another --- sbpack/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbpack/version.py b/sbpack/version.py index 3a6d6bd..ca521aa 100644 --- a/sbpack/version.py +++ b/sbpack/version.py @@ -1 +1 @@ -__version__ = "2023.06.20rc4" +__version__ = "2023.06.20rc5" From 82be8182c34d90c8cc09910b420d14ff6905e0fc Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Thu, 9 Nov 2023 14:56:56 +0100 Subject: [PATCH 3/6] Move constants from utils.py and nextflow.py to the constants.py Remove manual validation Remove input and output schema files as separate inputs Fix reports not adding proper glob patterns Add execution mode --- sbpack/noncwl/constants.py | 101 +++++++++++++++ sbpack/noncwl/nextflow.py | 259 +++++++++++++------------------------ sbpack/noncwl/utils.py | 181 ++++++++++---------------- sbpack/noncwl/wdl.py | 75 ++++++----- sbpack/version.py | 2 +- 5 files changed, 302 insertions(+), 316 deletions(-) create mode 100644 sbpack/noncwl/constants.py diff --git a/sbpack/noncwl/constants.py b/sbpack/noncwl/constants.py new file mode 100644 index 0000000..d870282 --- /dev/null +++ b/sbpack/noncwl/constants.py @@ -0,0 +1,101 @@ +from enum import Enum + +# ############################## Generic Bits ############################### # +PACKAGE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB + + +# keep track of what extensions are applicable for processing +class EXTENSIONS: + yaml = 'yaml' + yml = 'yml' + json = 'json' + cwl = 'cwl' + + yaml_all = [yaml, yml, cwl] + json_all = [json, cwl] + all_ = [yaml, yml, json, cwl] + + +# ############################ CWL Standard Bits ############################ # +# A generic SB input array of files that should be available on the +# instance but are not explicitly provided to the execution as wdl params. +GENERIC_FILE_ARRAY_INPUT = { + "id": "auxiliary_files", + "type": "File[]?", + "label": "Auxiliary files", + "doc": "List of files not added as explicit workflow inputs but " + "required for workflow execution." +} + +GENERIC_NF_OUTPUT_DIRECTORY = { + "id": "nf_workdir", + "type": "Directory?", + "label": "Work Directory", + "doc": "This is a template output. " + "Please change glob to directories specified in " + "publishDir in the workflow.", + "outputBinding": { + "glob": "work" + } +} + +GENERIC_WDL_OUTPUT_DIRECTORY = { + "id": "output_txt", + "doc": "This is a template output. " + "Please modify to collect final outputs using " + "glob inside the working directory.", + "type": "File[]", + "outputBinding": { + "glob": "*.txt" + } +} + +# Requirements to be added to sb wrapper +WRAPPER_REQUIREMENTS = [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "InitialWorkDirRequirement", + "listing": [ + "$(inputs.auxiliary_files)" + ] + } +] + +# ############################## Nextflow Bits ############################## # +# Keys that should be skipped when parsing nextflow tower yaml file + +# Mappings of nextflow input fields to SB input fields +# nextflow_key: cwl_key mapping +NF_TO_CWL_PORT_MAP = { + 'default': 'sbg:toolDefaultValue', + 'description': 'label', + 'help_text': 'doc', + 'mimetype': 'format', + 'fa_icon': 'sbg:icon', + 'pattern': 'sbg:pattern', + 'hidden': 'sbg:hidden', +} + +# Mappings of nextflow definition fields to SB category fields +# nextflow_key: cwl_key mapping +NF_TO_CWL_CATEGORY_MAP = { + 'title': 'sbg:title', + 'description': 'sbg:doc', + 'fa_icon': 'sbg:icon', +} + +# What keys to skip from the tower.yml file +SKIP_NEXTFLOW_TOWER_KEYS = [ + 'tower', + 'mail', +] + + +class ExecMode(Enum): + single = 'single-instance' + multi = 'multi-instance' + + def __str__(self): + return self.value diff --git a/sbpack/noncwl/nextflow.py b/sbpack/noncwl/nextflow.py index f019caf..6d46c47 100644 --- a/sbpack/noncwl/nextflow.py +++ b/sbpack/noncwl/nextflow.py @@ -20,13 +20,16 @@ create_profile_enum, update_schema_code_package, install_or_upgrade_app, - validate_inputs, - nf_schema_type_mapper, + nf_to_sb_input_mapper, +) +from sbpack.noncwl.constants import ( + ExecMode, GENERIC_FILE_ARRAY_INPUT, - GENERIC_OUTPUT_DIRECTORY, + GENERIC_NF_OUTPUT_DIRECTORY, WRAPPER_REQUIREMENTS, SKIP_NEXTFLOW_TOWER_KEYS, EXTENSIONS, + NF_TO_CWL_CATEGORY_MAP, ) logger = logging.getLogger(__name__) @@ -46,90 +49,7 @@ def __init__(self, workflow_path, dump_schema=False, sb_doc=None): self.nf_config_files = None self.sb_doc = sb_doc self.executor_version = None - self.output_schemas = None - self.input_schemas = None - - @staticmethod - def nf_cwl_port_map(): - """ - Mappings of nextflow input fields to SB input fields - nextflow_key: cwl_key mapping - """ - return { - 'default': 'sbg:toolDefaultValue', - 'description': 'label', - 'help_text': 'doc', - 'mimetype': 'format', - 'fa_icon': 'sbg:icon', - 'pattern': 'sbg:pattern', - 'hidden': 'sbg:hidden', - } - - @staticmethod - def nf_cwl_category_map(): - """ - Mappings of nextflow definition fields to SB category fields - nextflow_key: cwl_key mapping - """ - return { - 'title': 'sbg:title', - 'description': 'sbg:doc', - 'fa_icon': 'sbg:icon', - } - - def nf_to_sb_input_mapper(self, port_id, port_data, category=None): - """ - Convert a single input from Nextflow schema to SB schema - """ - sb_input = dict() - sb_input['id'] = port_id - sb_input['type'] = nf_schema_type_mapper(port_data) - sb_input['type'].append('null') - if category: - sb_input['sbg:category'] = category - for nf_field, sb_field in self.nf_cwl_port_map().items(): - if nf_field in port_data: - value = port_data[nf_field] - if value == ":" and nf_field == 'default': - # Bug prevents running a task if an input's - # default value is exactly ":" - value = " :" - sb_input[sb_field] = value - - sb_input['inputBinding'] = { - 'prefix': f'--{port_id}', - } - return sb_input - - def collect_nf_definition_properties(self, definition): - """ - Nextflow inputs schema contains multiple definitions where each - definition contains multiple properties - """ - cwl_inputs = list() - sb_category = dict() - - for nf_field, sb_field in self.nf_cwl_category_map().items(): - if nf_field in definition: - sb_category[sb_field] = definition[nf_field] - - input_category = 'Inputs' - if 'title' in definition: - input_category = sb_category['sbg:title'] - - for port_id, port_data in definition['properties'].items(): - cwl_inputs.append(self.nf_to_sb_input_mapper( - port_id, - port_data, - category=input_category, - )) - # Nextflow schema field "required" lists input_ids - # for required inputs. - # Reason we are not using definition.get('required', []) any longer - # is that some inputs can be contained in the profile. This means - # that they do not have to be provided explicitly through the - # command line. - return cwl_inputs, sb_category + self.execution_mode = None def nf_schema_build(self): """ @@ -154,7 +74,10 @@ def nf_schema_build(self): return nf_schema_path @staticmethod - def file_is_nf_schema(path): + def file_is_nf_schema(path: str) -> bool: + """ + Validation if the provided file is an NF schema file + """ try: schema = yaml.safe_load(path) if 'definitions' not in schema: @@ -170,51 +93,68 @@ def file_is_nf_schema(path): logger.info(f"File {path} is not an nf schema file (due to {e})") return False - def generate_sb_inputs(self, manual_validation=False): + def generate_sb_inputs(self): """ Generate SB inputs schema """ cwl_inputs = list() - if self.input_schemas: - nf_schemas = [ - f for f in self.input_schemas if self.file_is_nf_schema(f) - ] - if nf_schemas: - self.nf_schema_path = nf_schemas.pop().name + # ## Add profiles to the input ## # + self.nf_config_files = get_config_files(self.workflow_path) + + profiles = dict() + + for path in self.nf_config_files: + profiles.update(parse_config_file(path)) + + profiles_choices = sorted(list(set(profiles.keys()))) + if profiles: + cwl_inputs.append(create_profile_enum(profiles_choices)) + + # Optional inputs due to profiles + # optional_inputs = [] + # for profile_id, profile_contents in profiles.items(): + # for key in profile_contents.keys(): + # if 'params.' in key: + # input_ = key.rsplit('params.', 0) + # optional_inputs.extend(input_) + # optional_inputs = set(optional_inputs) + + # ## Add inputs ## # if self.nf_schema_path: with open(self.nf_schema_path, 'r') as f: nf_schema = yaml.safe_load(f) for p_key, p_value in nf_schema.get('properties', {}).items(): cwl_inputs.append( - self.nf_to_sb_input_mapper(p_key, p_value)) + nf_to_sb_input_mapper(p_key, p_value)) for def_name, definition in nf_schema.get( 'definitions', {}).items(): - inputs, category = self.collect_nf_definition_properties( - definition) - cwl_inputs.extend(inputs) - # add category to schema - - if self.input_schemas: - for file in self.input_schemas: - if file.name == self.nf_schema_path: - continue - if file.name.split('.').pop().lower() in \ - EXTENSIONS.all_: - cwl_inputs.extend(self.parse_cwl(file, 'inputs')) - - # Add profiles to the input - self.nf_config_files = get_config_files(self.workflow_path) - profiles = [] - for path in self.nf_config_files: - profiles.extend(list(parse_config_file(path).keys())) - - profiles = sorted(list(set(profiles))) - - if profiles: - cwl_inputs.append(create_profile_enum(profiles)) + # Nextflow inputs schema contains multiple definitions where + # each definition contains multiple properties + category = dict() + + for nf_field, sb_field in NF_TO_CWL_CATEGORY_MAP.items(): + if nf_field in definition: + category[sb_field] = definition[nf_field] + + input_category = 'Inputs' + if 'title' in definition: + input_category = category['sbg:title'] + + for port_id, port_data in definition['properties'].items(): + req = False + # if port_id in definition.get('required', []) and \ + # port_id not in optional_inputs: + # req = True + + cwl_inputs.append(nf_to_sb_input_mapper( + port_id, + port_data, + category=input_category, + required=req, + )) # Add the generic file array input - auxiliary files cwl_inputs.append(GENERIC_FILE_ARRAY_INPUT) @@ -231,10 +171,6 @@ def generate_sb_inputs(self, manual_validation=False): input_ids.add(id_) inp['id'] = id_ - if manual_validation: - print('Input validation') - cwl_inputs = validate_inputs(cwl_inputs) - print('Input validation completed') return cwl_inputs def generate_sb_outputs(self): @@ -244,17 +180,16 @@ def generate_sb_outputs(self): output_ids = set() cwl_outputs = list() - if self.output_schemas: - for file in self.output_schemas: - if file.name.split('.').pop().lower() in EXTENSIONS.yaml_all: - cwl_outputs.extend(self.parse_output_yml(file)) - if file.name.split('.').pop().lower() in EXTENSIONS.json_all: - cwl_outputs.extend(self.parse_cwl(file, 'outputs')) + if get_tower_yml(self.workflow_path): + cwl_outputs.extend( + self.parse_output_yml( + open(get_tower_yml(self.workflow_path))) + ) # if the only output is reports, or there are no outputs, add generic if len(cwl_outputs) == 0 or \ (len(cwl_outputs) == 1 and cwl_outputs[0]['id'] == 'reports'): - cwl_outputs.append(GENERIC_OUTPUT_DIRECTORY) + cwl_outputs.append(GENERIC_NF_OUTPUT_DIRECTORY) for output in cwl_outputs: base_id = output['id'] @@ -372,9 +307,14 @@ def parse_output_yml(self, yml_file): # Tower yml file can use "tower" key in the yml file to designate # some configurations tower uses. Since these are not output # definitions, we skip these. - if key in SKIP_NEXTFLOW_TOWER_KEYS and \ - yml_file == 'tower.yml': + if key in SKIP_NEXTFLOW_TOWER_KEYS: continue + if key == "reports" and type(value) is dict: + temp = value.copy() + for k, v in temp.items(): + value[f"work/**/**/{k}"] = v + del value[k] + outputs.append( self.make_output_type(key, value) ) @@ -418,22 +358,12 @@ def dump_sb_wrapper(self, out_format=EXTENSIONS.yaml): def generate_sb_app( self, sb_schema=None, sb_entrypoint='main.nf', - executor_version=None, output_schemas=None, input_schemas=None, - manual_validation=False + executor_version=None, execution_mode=None ): # default nextflow entrypoint """ Generate an SB app for a nextflow workflow, OR edit the one created and defined by the user """ - if output_schemas: - self.output_schemas = output_schemas - if get_tower_yml(self.workflow_path): - if not self.output_schemas: - self.output_schemas = [] - self.output_schemas.append(open(get_tower_yml(self.workflow_path))) - - if input_schemas: - self.input_schemas = input_schemas if sb_schema: new_code_package = self.sb_package_id if \ @@ -447,9 +377,7 @@ def generate_sb_app( self.sb_wrapper['cwlVersion'] = 'None' self.sb_wrapper['class'] = 'nextflow' - self.sb_wrapper['inputs'] = self.generate_sb_inputs( - manual_validation - ) + self.sb_wrapper['inputs'] = self.generate_sb_inputs() self.sb_wrapper['outputs'] = self.generate_sb_outputs() self.sb_wrapper['requirements'] = WRAPPER_REQUIREMENTS @@ -464,6 +392,11 @@ def generate_sb_app( self.sb_wrapper['app_content'] = app_content + if execution_mode or self.execution_mode: + self.sb_wrapper['hints'] = [ + {'NextflowExecutionMode': execution_mode} + ] + if self.sb_doc: self.sb_wrapper['doc'] = self.sb_doc elif get_readme(self.workflow_path): @@ -516,6 +449,11 @@ def main(): "--executor-version", required=False, help="Version of the Nextflow executor to be used with the app.", ) + parser.add_argument( + "--execution-mode", type=ExecMode, choices=list(ExecMode), + required=False, default=ExecMode.single, + help="Execution mode for your application.", + ) parser.add_argument( "--json", action="store_true", required=False, help="Dump sb app schema in JSON format (YAML by default)", @@ -525,29 +463,12 @@ def main(): help="Do not create new schema, use this schema file. " "It is sb_nextflow_schema in JSON or YAML format.", ) - parser.add_argument( - "--output-schema-files", required=False, - default=None, type=argparse.FileType('r'), nargs='+', - help="Additional output schema files in CWL or tower.yml format.", - ) - parser.add_argument( - "--input-schema-files", required=False, - default=None, type=argparse.FileType('r'), nargs='+', - help="Additional input schema files in CWL format.", - ) parser.add_argument( "--revision-note", required=False, default=None, type=str, help="Revision note to be placed in the CWL schema if the app is " "uploaded to the sbg platform.", ) - parser.add_argument( - "--manual-validation", required=False, action="store_true", - default=False, - help="You will have to provide validation for all 'string' type inputs" - " if are string (str), file (file), directory (dir), list of file" - " (files), or list of directory (dirs) type inputs.", - ) args = parser.parse_args() @@ -582,7 +503,7 @@ def main(): sb_entrypoint=entrypoint, sb_schema=args.sb_schema, executor_version=args.executor_version, - manual_validation=args.manual_validation + execution_mode=args.execution_mode.value, ) else: @@ -598,18 +519,14 @@ def main(): folder_name='nextflow_workflows' ) - # Build or update nextflow inputs schema - if not args.input_schema_files: - nf_schema_path = nf_wrapper.nf_schema_build() - nf_wrapper.nf_schema_path = nf_schema_path + nf_schema_path = nf_wrapper.nf_schema_build() + nf_wrapper.nf_schema_path = nf_schema_path # Create app sb_app = nf_wrapper.generate_sb_app( sb_entrypoint=entrypoint, executor_version=args.executor_version, - output_schemas=args.output_schema_files, - input_schemas=args.input_schema_files, - manual_validation=args.manual_validation + execution_mode=args.execution_mode.value, ) # Dump app to local file out_format = EXTENSIONS.json if args.json else EXTENSIONS.yaml diff --git a/sbpack/noncwl/utils.py b/sbpack/noncwl/utils.py index f4ee6f2..35f278e 100644 --- a/sbpack/noncwl/utils.py +++ b/sbpack/noncwl/utils.py @@ -8,66 +8,15 @@ from sbpack.pack import pack from sevenbridges.errors import NotFound +from sbpack.noncwl.constants import ( + PACKAGE_SIZE_LIMIT, + EXTENSIONS, + NF_TO_CWL_PORT_MAP, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -PACKAGE_SIZE_LIMIT = 256 * 1024 * 1024 - 1 # ~256 MB - -# A generic SB input array of files that should be available on the -# instance but are not explicitly provided to the execution as wdl params. -GENERIC_FILE_ARRAY_INPUT = { - "id": "auxiliary_files", - "type": "File[]?", - "label": "Auxiliary files", - "doc": "List of files not added as explicit workflow inputs but " - "required for workflow execution." -} - -GENERIC_OUTPUT_DIRECTORY = { - "id": "nf_workdir", - "type": "Directory?", - "label": "Work Directory", - "doc": "This is a template output. " - "Please change glob to directories specified in " - "publishDir in the workflow.", - "outputBinding": { - "glob": "work" - } -} - -# Requirements to be added to sb wrapper -WRAPPER_REQUIREMENTS = [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "InitialWorkDirRequirement", - "listing": [ - "$(inputs.auxiliary_files)" - ] - } -] - - -# Keys that should be skipped when parsing nextflow tower yaml file -SKIP_NEXTFLOW_TOWER_KEYS = [ - 'tower', - 'mail', -] - - -# keep track of what extensions are applicable for processing -class EXTENSIONS: - yaml = 'yaml' - yml = 'yml' - json = 'json' - cwl = 'cwl' - - yaml_all = [yaml, yml, cwl] - json_all = [json, cwl] - all_ = [yaml, yml, json, cwl] - def nf_schema_type_mapper(input_type_string): """ @@ -79,6 +28,36 @@ def nf_schema_type_mapper(input_type_string): return type_mapper(type_, format_) +def nf_to_sb_input_mapper(port_id, port_data, category=None, required=False): + """ + Convert a single input from Nextflow schema to SB schema + """ + sb_input = dict() + sb_input['id'] = port_id + sb_input['type'] = nf_schema_type_mapper(port_data) + sb_input['inputBinding'] = { + 'prefix': f'--{port_id}', + } + + if not required: + sb_input['type'].append('null') + + if category: + sb_input['sbg:category'] = category + + for nf_field, sb_field in NF_TO_CWL_PORT_MAP.items(): + if nf_field in port_data: + value = port_data[nf_field] + if value == ":" and nf_field == 'default': + # Bug prevents running a task if an input's + # default value is exactly ":". This bug will likely be + # fixed at the time of release of this version. + value = " :" + sb_input[sb_field] = value + + return sb_input + + def type_mapper(type_, format_): if isinstance(type_, str): if type_ == 'string' and 'path' in format_: @@ -138,35 +117,6 @@ def create_profile_enum(profiles: list): } -def validate_inputs(inputs): - types = { - 'str': 'string?', - 'file': 'File?', - 'dir': 'Directory?', - 'files': 'File[]?', - 'dirs': 'Directory[]?' - } - exit_codes = ['e', 'exit', 'quit', 'q'] - - for input_ in inputs: - if 'string' in input_['type']: - new_type = input(f'What input type is "{input_["id"]}"?\n') - while new_type.lower() not in \ - list(types.keys()) + exit_codes: - print( - f'{new_type} is not a valid input. Please use the ' - f'following notation:') - for key, val in types.items(): - print(f"\t{key}: {val}") - new_type = input() - if new_type in exit_codes: - break - - nt = types[new_type] - input_['type'] = nt - return inputs - - def get_dict_depth(dict_, level=0): """ Find the depth of the dictionary. Example: @@ -340,45 +290,56 @@ def get_config_files(path): return paths or None -def parse_config_file(file_path): - profiles_text = "" +def find_config_section(file_path: str, section: str) -> str: + section_text = "" + found_section = False + brackets = 0 with open(file_path, 'r') as file: - config = file.read() - - # Trace - # trace_pattern = re.compile(r"trace\s\{.*}", re.MULTILINE | re.DOTALL) - # if re.findall(trace_pattern, config): - # logger.warning("Detected `trace` in nextflow config. This " - # "functionality is currently not supported.") - found_profiles = False - brackets = 0 - - for line in config.split("\n"): - if found_profiles: - profiles_text += line + for line in file.readlines(): + if found_section: + section_text += line brackets += line.count("{") - line.count("}") if brackets < 0: break - if re.findall(r'profiles\s+\{', line): - profiles_text += "{\n" - found_profiles = True + if re.findall(section + r'\s+\{', line): + section_text += "{\n" + found_section = True + + return section_text + + +def parse_config_file(file_path: str) -> dict: + profiles_text = find_config_section(file_path, 'profiles') # Extract profiles using regex profiles = {} - pattern = re.compile(r'\s*(\w+)\s*{([^}]+)}', re.MULTILINE | re.DOTALL) - blocks = re.findall(pattern, profiles_text) + block_pattern = re.compile( + r'\s*(\w+)\s*{([^}]+)}', re.MULTILINE | re.DOTALL + ) + key_val_pattern = re.compile( + r'([a-zA-Z._]+)(?:\s+|)=(?:\s+|)([^\s]+)' + ) + include_pattern = re.compile( + r'includeConfig\s+[\'\"]([a-zA-Z_.\\/]+)[\'\"]' + ) + + blocks = re.findall(block_pattern, profiles_text) for name, content in blocks: - settings = dict( - re.findall(r'([a-zA-Z._]+)(?:\s+|)=(?:\s+|)([^\s]+)', content) - ) + settings = dict(re.findall(key_val_pattern, content)) profiles[name] = settings - include_path = re.findall( - r'includeConfig\s+[\'\"]([a-zA-Z_.\\/]+)[\'\"]', content) + include_path = re.findall(include_pattern, content) if include_path: profiles[name]['includeConfig'] = include_path + include_path = include_path.pop() + additional_path = os.path.join( + os.path.dirname(file_path), include_path) + params_text = find_config_section(additional_path, 'params') + params = dict(re.findall(key_val_pattern, params_text)) + for param, val in params.items(): + profiles[name][f"params.{param}"] = val # return currently returns includeConfig and settings, which are not used # but could be used in the future versions of sbpack diff --git a/sbpack/noncwl/wdl.py b/sbpack/noncwl/wdl.py index ac047ec..43bea6c 100644 --- a/sbpack/noncwl/wdl.py +++ b/sbpack/noncwl/wdl.py @@ -4,17 +4,25 @@ import sbpack.lib as lib import argparse import logging -from sevenbridges.errors import NotFound from sbpack.version import __version__ import re from subprocess import check_call -from sbpack.noncwl.utils import (zip_and_push_to_sb, get_readme, install_or_upgrade_app, - update_schema_code_package, GENERIC_FILE_ARRAY_INPUT, WRAPPER_REQUIREMENTS) +from sbpack.noncwl.utils import ( + zip_and_push_to_sb, + get_readme, + install_or_upgrade_app, + update_schema_code_package, +) + +from sbpack.noncwl.constants import ( + GENERIC_FILE_ARRAY_INPUT, + WRAPPER_REQUIREMENTS, + GENERIC_WDL_OUTPUT_DIRECTORY, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -PACKAGE_SIZE_LIMIT = 100 * 1024 * 1024 # MB JAVA_EXE = os.getenv('SBPACK_WDL_JAVA_EXE', 'java') @@ -30,26 +38,9 @@ def __init__(self, workflow_path, entrypoint, dump_schema=False, self.dump_schema = dump_schema self.sb_doc = sb_doc - @staticmethod - def default_wdl_sb_outputs(): - """ - Default output for a WDL execution - """ - return [ - { - "id": "output_txt", - "doc": "This is a template output." - " Please modify to collect final outputs using glob inside the working directory.", - "type": "File[]", - "outputBinding": { - "glob": "*.txt" - } - } - ] - @staticmethod def parse_type(type_string): - t, _, attribute_string = re.search('([^\(]*)(\((.*)\))?', + t, _, attribute_string = re.search(r'([^(]*)(\((.*)\))?', type_string).groups() attribute_string = attribute_string \ if attribute_string is not None else '' @@ -87,8 +78,15 @@ def generate_sb_inputs(self): if not womtool_inputs: wdl_path = f'{self.workflow_path}/{self.entrypoint}' - java_cmd = f"{JAVA_EXE} -jar {self.womtool_path} inputs {wdl_path} " \ - f"> {self.workflow_path}/womtool_inputs.json" + java_cmd = " ".join([ + str(JAVA_EXE), + '-jar', + str(self.womtool_path), + 'inputs', + str(wdl_path), + ">", + f"{self.workflow_path}/womtool_inputs.json" + ]) call_java_womtool = check_call(java_cmd, shell=True) with open(f"{self.workflow_path}/womtool_inputs.json", 'r') as f: womtool_inputs = json.load(f) @@ -96,8 +94,10 @@ def generate_sb_inputs(self): cwl_inputs = list() for key, value in womtool_inputs.items(): typ, descr = self.parse_type(value) - if bool(re.search('\[(.*?)\]', typ)): - typ_temp = self.womtool_type_mapper(re.search('\[(.*?)\]', typ).groups()[0]) + '[]' + if bool(re.search(r'\[(.*?)]', typ)): + typ_temp = self.womtool_type_mapper( + re.search(r'\[(.*?)]', typ).groups()[0] + ) + '[]' if '?' in typ: typ = typ_temp + '?' else: @@ -121,7 +121,9 @@ def dump_sb_wrapper(self, out_format='yaml'): """ Dump SB wrapper for WDL workflow to a file """ - sb_wrapper_path = os.path.join(self.workflow_path, f'sb_wdl_schema.{out_format}') + sb_wrapper_path = os.path.join( + self.workflow_path, f'sb_wdl_schema.{out_format}' + ) if out_format == 'yaml': with open(sb_wrapper_path, 'w') as f: yaml.dump(self.sb_wrapper, f, indent=4, sort_keys=True) @@ -131,11 +133,13 @@ def dump_sb_wrapper(self, out_format='yaml'): def generate_sb_app(self, sb_entrypoint, sb_schema=None): """ - Generate a raw SB app for a WDL workflow, or use provided sb_schema file + Generate a raw SB app for a WDL workflow, + or use provided sb_schema file """ if sb_schema: - new_code_package = self.sb_package_id if self.sb_package_id else None + new_code_package = self.sb_package_id if \ + self.sb_package_id else None schema_ext = sb_schema.split('/')[-1].split('.')[-1] return update_schema_code_package(sb_schema, schema_ext, @@ -145,7 +149,7 @@ def generate_sb_app(self, sb_entrypoint, sb_schema=None): self.sb_wrapper['cwlVersion'] = 'None' self.sb_wrapper['class'] = 'wdl' self.sb_wrapper['inputs'] = self.generate_sb_inputs() - self.sb_wrapper['outputs'] = self.default_wdl_sb_outputs() + self.sb_wrapper['outputs'] = [GENERIC_WDL_OUTPUT_DIRECTORY] self.sb_wrapper['requirements'] = WRAPPER_REQUIREMENTS @@ -185,8 +189,8 @@ def main(): help="Path to inputs .JSON generated by womtool", required=False) parser.add_argument("--sb-doc", - help="""Path to a doc file for sb app. - If not provided, README.md will be used if available""", + help="Path to a doc file for sb app. " + "If not provided, README.md will be used if available", required=False) parser.add_argument("--dump-sb-app", action="store_true", required=False, @@ -223,8 +227,10 @@ def main(): else: wom_path = None if not args.womtool_input and not args.womtool_path and not args.sb_schema: - raise SystemExit(f"Please specify either --womtool-path, --sb-schema or " - f"--womtools-input") + raise SystemExit( + f"Please specify either --womtool-path, --sb-schema or " + f"--womtools-input" + ) # Init api and wdl_wrapper api = lib.get_profile(args.profile) @@ -279,5 +285,6 @@ def main(): install_or_upgrade_app(api, args.appid, sb_app) + if __name__ == "__main__": main() diff --git a/sbpack/version.py b/sbpack/version.py index ca521aa..becb052 100644 --- a/sbpack/version.py +++ b/sbpack/version.py @@ -1 +1 @@ -__version__ = "2023.06.20rc5" +__version__ = "2023.11.9rc1" From 77f429c187a54c7477fefb4b5d49122537165f6e Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Thu, 9 Nov 2023 15:11:04 +0100 Subject: [PATCH 4/6] Fix wrapper hints --- sbpack/noncwl/nextflow.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sbpack/noncwl/nextflow.py b/sbpack/noncwl/nextflow.py index 6d46c47..57842a0 100644 --- a/sbpack/noncwl/nextflow.py +++ b/sbpack/noncwl/nextflow.py @@ -393,9 +393,15 @@ def generate_sb_app( self.sb_wrapper['app_content'] = app_content if execution_mode or self.execution_mode: - self.sb_wrapper['hints'] = [ - {'NextflowExecutionMode': execution_mode} - ] + if 'hints' not in self.sb_wrapper: + self.sb_wrapper['hints'] = [] + + self.sb_wrapper['hints'].append( + { + 'class': 'sbg:NextflowExecutionMode', + 'value': execution_mode.value + } + ) if self.sb_doc: self.sb_wrapper['doc'] = self.sb_doc @@ -451,7 +457,7 @@ def main(): ) parser.add_argument( "--execution-mode", type=ExecMode, choices=list(ExecMode), - required=False, default=ExecMode.single, + required=False, default=None, help="Execution mode for your application.", ) parser.add_argument( @@ -526,7 +532,7 @@ def main(): sb_app = nf_wrapper.generate_sb_app( sb_entrypoint=entrypoint, executor_version=args.executor_version, - execution_mode=args.execution_mode.value, + execution_mode=args.execution_mode, ) # Dump app to local file out_format = EXTENSIONS.json if args.json else EXTENSIONS.yaml From c1a5185c30fff904ddf9c5140477448b1f8dc555 Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Thu, 9 Nov 2023 16:30:49 +0100 Subject: [PATCH 5/6] Update Readme.md --- sbpack/noncwl/Readme.md | 80 +++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/sbpack/noncwl/Readme.md b/sbpack/noncwl/Readme.md index 72ceabc..2e36a42 100644 --- a/sbpack/noncwl/Readme.md +++ b/sbpack/noncwl/Readme.md @@ -53,39 +53,48 @@ to the Platform. ``` $ sbpack_nf -h -usage: sbpack_nf [-h] [--profile PROFILE] --appid APPID --workflow-path WORKFLOW_PATH [--entrypoint ENTRYPOINT] [--sb-package-id SB_PACKAGE_ID] [--sb-inputs SB_INPUTS] [--sb-outputs SB_OUTPUTS] - [--sb-doc SB_DOC] [--dump-sb-app] [--no-package] [--json] [--sb-schema SB_SCHEMA] +usage: nextflow.py [-h] [--profile PROFILE] --appid APPID --workflow-path + WORKFLOW_PATH [--entrypoint ENTRYPOINT] + [--sb-package-id SB_PACKAGE_ID] [--sb-doc SB_DOC] + [--dump-sb-app] [--no-package] + [--executor-version EXECUTOR_VERSION] + [--execution-mode {single-instance,multi-instance}] + [--json] [--sb-schema SB_SCHEMA] + [--revision-note REVISION_NOTE] optional arguments: -h, --help show this help message and exit - --profile PROFILE SB platform profile as set in the SB API credentials file. + --profile PROFILE SB platform profile as set in the SB API credentials + file. --appid APPID Takes the form {user or division}/{project}/{app_id}. - --entrypoint ENTRYPOINT - Relative path to the workflow from the main workflow directory. If not provided, 'main.nf' will be used if available. If not available, any '.nf' located in the workflow-path will be used. --workflow-path WORKFLOW_PATH Path to the main workflow directory + --entrypoint ENTRYPOINT + Relative path to the workflow from the main workflow + directory. If not provided, 'main.nf' will be used if + available. If not available, but a single '*.nf' is + located in the workflow-path will be used. If more + than one '*.nf' script is detected, an error is + raised. --sb-package-id SB_PACKAGE_ID Id of an already uploaded package - --sb-inputs SB_INPUTS - Path to pre built sb app inputs schema - --sb-outputs SB_OUTPUTS - Path to pre build sb app outputs schema - --sb-doc SB_DOC Path to a doc file for sb app. If not provided, README.md will be used if available + --sb-doc SB_DOC Path to a doc file for sb app. If not provided, + README.md will be used if available --dump-sb-app Dump created sb app to file if true and exit - --no-package Only provide a sb app schema and a git URL for entrypoint + --no-package Only provide a sb app schema and a git URL for + entrypoint --executor-version EXECUTOR_VERSION - Version of the Nextflow executor to be used with the app. + Version of the Nextflow executor to be used with the + app. + --execution-mode {single-instance,multi-instance} + Execution mode for your application. --json Dump sb app schema in JSON format (YAML by default) --sb-schema SB_SCHEMA - Do not create new schema, use this schema file. It is sb_nextflow_schema in JSON or YAML format. - --output-schema-files OUTPUT_SCHEMA_FILES [OUTPUT_SCHEMA_FILES ...] - Additional output schema files in CWL or tower.yml format. - --input-schema-files INPUT_SCHEMA_FILES [INPUT_SCHEMA_FILES ...] - Additional input schema files in CWL format. - --revision-note REVISION_NOTE [REVISION_NOTE ...] - Revision note to be placed in the CWL schema if the app is uploaded to the sbg platform. - --manual-validation You will have to provide validation for all 'string' type inputs if are string (str), file (file), directory (dir), list of file (files), or list of directory (dirs) type inputs. - + Do not create new schema, use this schema file. It is + sb_nextflow_schema in JSON or YAML format. + --revision-note REVISION_NOTE + Revision note to be placed in the CWL schema if the + app is uploaded to the sbg platform. ``` ### Example @@ -97,24 +106,41 @@ $ sbpack_nf --profile default --workflow-path /path/to/workflow_dir # sbcopy Developed to enable deep copying of Nextflow and WDL apps on the SB platform between projects. -Note: The link between the original, and the new app will not be available. +This tool also allows copying between divisions and platforms. When used to perform this copy action, it requires credentials for both the source and the destination environment. Both are provided to the `--profile` argument in the source, target order. +Note: The link between the original, and the new app will not be available. ### Usage ``` sbcopy -h -usage: sbcopy [-h] [--profile PROFILE] --appid APPID --projectid PROJECTID +usage: copy_app.py [-h] [--profile PROFILE [PROFILE ...]] --appid APPID + --projectid PROJECTID optional arguments: -h, --help show this help message and exit - --profile PROFILE SB platform profile as set in the SB API credentials file. - --appid APPID What to copy? Takes the form {user or division}/{project}/{app_id} or {user or division}/{project}/{app_id}/{revision_no}. + --profile PROFILE [PROFILE ...] + SB platform profile as set in the SB API credentials + file. If you are using sbcopy to copy an app from one + division to another, please provide two profiles - + first profile for the source app (appid), and second + for the destination project (projectid). + --appid APPID What to copy? Takes the form {user or + division}/{project}/{app_id} or {user or + division}/{project}/{app_id}/{revision_no}. --projectid PROJECTID - Where to copy? Takes the form {user or division}/{project} + Where to copy? Takes the form {user or + division}/{project} + ``` -### Example +### Examples +Copying an app from one project to another within the same division. ``` sbcopy --appid division-name/project-name/app-name --projectid division-name/destination-project-name +``` + +Copying an app from one division to another. +``` +sbcopy --profile source_division target_division --appid source-division-name/project-name/app-name --projectid target-division-name/destination-project-name ``` \ No newline at end of file From 94e305613e2150951dcf7bc1d3c20ce9762652ae Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Mon, 13 Nov 2023 10:14:39 +0100 Subject: [PATCH 6/6] Remove __str__ from ExecMode EXTENSIONS yaml_all and json_all do not overlap --- sbpack/noncwl/constants.py | 5 +---- sbpack/noncwl/utils.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sbpack/noncwl/constants.py b/sbpack/noncwl/constants.py index d870282..c0f0b3a 100644 --- a/sbpack/noncwl/constants.py +++ b/sbpack/noncwl/constants.py @@ -12,7 +12,7 @@ class EXTENSIONS: cwl = 'cwl' yaml_all = [yaml, yml, cwl] - json_all = [json, cwl] + json_all = [json] all_ = [yaml, yml, json, cwl] @@ -96,6 +96,3 @@ class EXTENSIONS: class ExecMode(Enum): single = 'single-instance' multi = 'multi-instance' - - def __str__(self): - return self.value diff --git a/sbpack/noncwl/utils.py b/sbpack/noncwl/utils.py index 35f278e..711d613 100644 --- a/sbpack/noncwl/utils.py +++ b/sbpack/noncwl/utils.py @@ -354,7 +354,7 @@ def update_schema_code_package(sb_schema, schema_ext, new_code_package): sb_schema_dict = pack(sb_schema) sb_schema_dict['app_content']['code_package'] = new_code_package - if schema_ext.lower() == EXTENSIONS.json: + if schema_ext.lower() in EXTENSIONS.json_all: with open(sb_schema, 'w') as file: json.dump(sb_schema_dict, file)