diff --git a/sbpack/noncwl/copy.py b/sbpack/noncwl/copy_app.py similarity index 97% rename from sbpack/noncwl/copy.py rename to sbpack/noncwl/copy_app.py index c0e063f..fae5b0b 100644 --- a/sbpack/noncwl/copy.py +++ b/sbpack/noncwl/copy_app.py @@ -1,7 +1,7 @@ import argparse import logging import sbpack.lib as lib -from sevenbridges.errors import NotFound +# from sevenbridges.errors import NotFound from sbpack.noncwl.utils import install_or_upgrade_app logger = logging.getLogger(__name__) diff --git a/sbpack/noncwl/nextflow.py b/sbpack/noncwl/nextflow.py index e1e90ec..f019caf 100644 --- a/sbpack/noncwl/nextflow.py +++ b/sbpack/noncwl/nextflow.py @@ -1,5 +1,4 @@ import re -import ruamel.yaml import json import argparse import logging @@ -22,6 +21,7 @@ update_schema_code_package, install_or_upgrade_app, validate_inputs, + nf_schema_type_mapper, GENERIC_FILE_ARRAY_INPUT, GENERIC_OUTPUT_DIRECTORY, WRAPPER_REQUIREMENTS, @@ -32,7 +32,6 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -PACKAGE_SIZE_LIMIT = 100 * 1024 * 1024 # MB NF_SCHEMA_DEFAULT_NAME = 'nextflow_schema.json' @@ -50,30 +49,6 @@ def __init__(self, workflow_path, dump_schema=False, sb_doc=None): self.output_schemas = None self.input_schemas = None - @staticmethod - def nf_schema_type_mapper(input_type_string): - """ - Convert nextflow schema input type to CWL - """ - type_ = input_type_string.get('type', 'string') - format_ = input_type_string.get('format', '') - if type_ == 'string' and 'path' in format_: - if format_ == 'file-path': - return ['File'] - if format_ == 'directory-path': - return ['Directory'] - if format_ == 'path': - return ['File'] - if type_ == 'string': - return ['string'] - if type_ == 'integer': - return ['int'] - if type_ == 'number': - return ['float'] - if type_ == 'boolean': - return ['boolean'] - return [type_] - @staticmethod def nf_cwl_port_map(): """ @@ -84,19 +59,43 @@ def nf_cwl_port_map(): 'default': 'sbg:toolDefaultValue', 'description': 'label', 'help_text': 'doc', + 'mimetype': 'format', + 'fa_icon': 'sbg:icon', + 'pattern': 'sbg:pattern', + 'hidden': 'sbg:hidden', + } + + @staticmethod + def nf_cwl_category_map(): + """ + Mappings of nextflow definition fields to SB category fields + nextflow_key: cwl_key mapping + """ + return { + 'title': 'sbg:title', + 'description': 'sbg:doc', + 'fa_icon': 'sbg:icon', } - def nf_to_sb_input_mapper(self, port_id, port_data): + def nf_to_sb_input_mapper(self, port_id, port_data, category=None): """ Convert a single input from Nextflow schema to SB schema """ sb_input = dict() sb_input['id'] = port_id - sb_input['type'] = self.nf_schema_type_mapper(port_data) + sb_input['type'] = nf_schema_type_mapper(port_data) sb_input['type'].append('null') + if category: + sb_input['sbg:category'] = category for nf_field, sb_field in self.nf_cwl_port_map().items(): if nf_field in port_data: - sb_input[sb_field] = port_data[nf_field] + value = port_data[nf_field] + if value == ":" and nf_field == 'default': + # Bug prevents running a task if an input's + # default value is exactly ":" + value = " :" + sb_input[sb_field] = value + sb_input['inputBinding'] = { 'prefix': f'--{port_id}', } @@ -108,10 +107,21 @@ def collect_nf_definition_properties(self, definition): definition contains multiple properties """ cwl_inputs = list() + sb_category = dict() + + for nf_field, sb_field in self.nf_cwl_category_map().items(): + if nf_field in definition: + sb_category[sb_field] = definition[nf_field] + + input_category = 'Inputs' + if 'title' in definition: + input_category = sb_category['sbg:title'] + for port_id, port_data in definition['properties'].items(): cwl_inputs.append(self.nf_to_sb_input_mapper( port_id, port_data, + category=input_category, )) # Nextflow schema field "required" lists input_ids # for required inputs. @@ -119,7 +129,7 @@ def collect_nf_definition_properties(self, definition): # is that some inputs can be contained in the profile. This means # that they do not have to be provided explicitly through the # command line. - return cwl_inputs + return cwl_inputs, sb_category def nf_schema_build(self): """ @@ -146,7 +156,7 @@ def nf_schema_build(self): @staticmethod def file_is_nf_schema(path): try: - schema = ruamel.yaml.safe_load(path) + schema = yaml.safe_load(path) if 'definitions' not in schema: return False if type(schema['definitions']) is not dict: @@ -175,15 +185,17 @@ def generate_sb_inputs(self, manual_validation=False): if self.nf_schema_path: with open(self.nf_schema_path, 'r') as f: - nf_schema = ruamel.yaml.safe_load(f) + nf_schema = yaml.safe_load(f) for p_key, p_value in nf_schema.get('properties', {}).items(): cwl_inputs.append( self.nf_to_sb_input_mapper(p_key, p_value)) for def_name, definition in nf_schema.get( 'definitions', {}).items(): - cwl_inputs.extend( - self.collect_nf_definition_properties(definition)) + inputs, category = self.collect_nf_definition_properties( + definition) + cwl_inputs.extend(inputs) + # add category to schema if self.input_schemas: for file in self.input_schemas: @@ -354,7 +366,7 @@ def parse_output_yml(self, yml_file): :return: list of outputs in CWL format. """ outputs = list() - yml_schema = ruamel.yaml.safe_load(yml_file) + yml_schema = yaml.safe_load(yml_file) for key, value in yml_schema.items(): # Tower yml file can use "tower" key in the yml file to designate @@ -525,7 +537,7 @@ def main(): ) parser.add_argument( "--revision-note", required=False, - default=None, type=str, nargs="+", + default=None, type=str, help="Revision note to be placed in the CWL schema if the app is " "uploaded to the sbg platform.", ) @@ -608,10 +620,9 @@ def main(): revision_note = f"Uploaded using sbpack v{__version__}" if args.revision_note: - revision_note = str(" ".join(args.revision_note)) + revision_note = str(args.revision_note) - if not args.sb_schema: - sb_app["sbg:revisionNotes"] = revision_note + sb_app["sbg:revisionNotes"] = revision_note install_or_upgrade_app(api, args.appid, sb_app) diff --git a/sbpack/noncwl/utils.py b/sbpack/noncwl/utils.py index 0513de6..9cee09d 100644 --- a/sbpack/noncwl/utils.py +++ b/sbpack/noncwl/utils.py @@ -5,12 +5,14 @@ import json import yaml import re + +from sbpack.pack import pack from sevenbridges.errors import NotFound logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -PACKAGE_SIZE_LIMIT = 100 * 1024 * 1024 +PACKAGE_SIZE_LIMIT = 256 * 1024 * 1024 - 1 # ~256 MB # A generic SB input array of files that should be available on the # instance but are not explicitly provided to the execution as wdl params. @@ -67,6 +69,45 @@ class EXTENSIONS: all_ = [yaml, yml, json, cwl] +def nf_schema_type_mapper(input_type_string): + """ + Convert nextflow schema input type to CWL + """ + type_ = input_type_string.get('type', 'string') + format_ = input_type_string.get('format', '') + + return type_mapper(type_, format_) + + +def type_mapper(type_, format_): + if isinstance(type_, str): + if type_ == 'string' and 'path' in format_: + if format_ == 'file-path': + return ['File'] + if format_ == 'directory-path': + return ['Directory'] + if format_ == 'path': + return ['File'] + if type_ == 'string': + return ['string'] + if type_ == 'integer': + return ['int'] + if type_ == 'number': + return ['float'] + if type_ == 'boolean': + return ['boolean'] + if type_ == 'object': + # this should be a record type (dictionary) + # it is provided as '{"key1": "value1", "key2": "value2"}' + return ['string'] + return [type_] + elif isinstance(type_, list): + temp_type_list = [] + for m in type_: + temp_type_list.extend(type_mapper(m, format_)) + return temp_type_list + + def create_profile_enum(profiles: list): """ If profiles are defined in the config file, this input stores the profiles @@ -151,12 +192,41 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name): for packages on SevenBridges Platform. Delete local .zip file. """ - basename = os.path.basename(os.path.abspath(workflow_path)) + '_' + \ - time.strftime("%Y%m%d-%H%M%S") - - zip_path = os.path.join(os.path.dirname(workflow_path), basename + '.zip') - shutil.make_archive(zip_path[:-4], 'zip', root_dir=workflow_path, - base_dir='./') + # This will create a temporary directory that will store all files from the + # original directory, except for the .git hidden directory. This dir + # sometimes collects a large amount of files that will not be used by the + # tool, and can increase the size of the archive up to 10 times. + + source_path = os.path.abspath(workflow_path) + destination_path = source_path + '_' + time.strftime("%Y%m%d-%H%M%S") + zip_path = destination_path + '.zip' + os.mkdir(destination_path) + + for root, dirs, files in os.walk(workflow_path): + pattern = re.compile(r'(?:^|.*/)\.git(?:$|/.*)') + if re.match(pattern, root): + continue + + dirs = [d for d in dirs if not re.match(pattern, d)] + for d in dirs: + source_file = os.path.join(root, d) + directory_path = os.path.join(destination_path, os.path.relpath( + source_file, workflow_path)) + if not os.path.exists(directory_path): + os.mkdir(directory_path) + + for file in files: + source_file = os.path.join(root, file) + dest_file = os.path.join(destination_path, os.path.relpath( + source_file, workflow_path)) + shutil.copy2(source_file, dest_file) + + shutil.make_archive( + destination_path, + 'zip', + root_dir=destination_path, + base_dir='./' + ) if os.path.getsize(zip_path) > PACKAGE_SIZE_LIMIT: logger.error(f"File size too big: {os.path.getsize(zip_path)}") @@ -184,7 +254,10 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name): print(f'Upload complete!') os.remove(zip_path) - print(f'Local file {zip_path} deleted.') + print(f'Temporary local file {zip_path} deleted.') + + shutil.rmtree(destination_path) + print(f'Temporary local folder {destination_path} deleted.') return uploaded_file_id @@ -249,10 +322,11 @@ def parse_config_file(file_path): with open(file_path, 'r') as file: config = file.read() - trace_pattern = re.compile(r"trace\s\{.*}", re.MULTILINE | re.DOTALL) - if re.findall(trace_pattern, config): - logger.warning("Detected `trace` in nextflow config. This " - "functionality is currently not supported.") + # Trace + # trace_pattern = re.compile(r"trace\s\{.*}", re.MULTILINE | re.DOTALL) + # if re.findall(trace_pattern, config): + # logger.warning("Detected `trace` in nextflow config. This " + # "functionality is currently not supported.") found_profiles = False brackets = 0 @@ -270,10 +344,12 @@ def parse_config_file(file_path): # Extract profiles using regex profiles = {} - pattern = re.compile(r'^\s*(\w+)\s*{([^}]+)}', re.MULTILINE | re.DOTALL) + pattern = re.compile(r'\s*(\w+)\s*{([^}]+)}', re.MULTILINE | re.DOTALL) blocks = re.findall(pattern, profiles_text) for name, content in blocks: - settings = dict(re.findall(r'\s*([a-zA-Z.]+)\s*=\s*(.*)', content)) + settings = dict( + re.findall(r'([a-zA-Z._]+)(?:\s+|)=(?:\s+|)([^\s]+)', content) + ) profiles[name] = settings include_path = re.findall( r'includeConfig\s+[\'\"]([a-zA-Z_.\\/]+)[\'\"]', content) @@ -289,23 +365,19 @@ def update_schema_code_package(sb_schema, schema_ext, new_code_package): """ Update the package in the sb_schema """ + + sb_schema_dict = pack(sb_schema) + sb_schema_dict['app_content']['code_package'] = new_code_package + if schema_ext.lower() == EXTENSIONS.json: - with open(sb_schema, 'r') as file: - sb_schema_json = json.load(file) - sb_schema_json['app_content']['code_package'] = new_code_package with open(sb_schema, 'w') as file: - json.dump(sb_schema_json, file) - - return sb_schema_json + json.dump(sb_schema_dict, file) elif schema_ext.lower() in EXTENSIONS.yaml_all: - with open(sb_schema, 'r') as file: - sb_schema_yaml = yaml.safe_load(file) - sb_schema_yaml['app_content']['code_package'] = new_code_package with open(sb_schema, 'w') as file: - yaml.dump(sb_schema_yaml, file) + yaml.dump(sb_schema_dict, file) - return sb_schema_yaml + return sb_schema_dict def install_or_upgrade_app(api, app_id, sb_app_raw): diff --git a/sbpack/version.py b/sbpack/version.py index 2109ac4..3a6d6bd 100644 --- a/sbpack/version.py +++ b/sbpack/version.py @@ -1 +1 @@ -__version__ = "2023.06.20rc3" +__version__ = "2023.06.20rc4" diff --git a/setup.py b/setup.py index 361b957..88e4033 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ 'sbpull = sbpack.unpack:main', 'sbpack_nf = sbpack.noncwl.nextflow:main', 'sbpack_wdl = sbpack.noncwl.wdl:main', - 'sbcopy = sbpack.noncwl.copy:main', + 'sbcopy = sbpack.noncwl.copy_app:main', ], }, diff --git a/tests/test_packing.py b/tests/test_packing.py index a1957f3..84dde34 100644 --- a/tests/test_packing.py +++ b/tests/test_packing.py @@ -25,7 +25,11 @@ def test_include(): assert "arguments" in cwl assert isinstance(cwl.get("arguments"), list) - inline_js_req = _find(cwl.get("requirements"), "class", "InlineJavascriptRequirement") + inline_js_req = _find( + cwl.get("requirements"), + "class", + "InlineJavascriptRequirement" + ) include_js = inline_js_req.get("expressionLib") assert isinstance(include_js, list) @@ -61,7 +65,10 @@ def test_embedded_packing(): def test_embedded_packing_with_ids(): cwl = pack("workflows/count-lines16-wf.cwl", add_ids=True) - assert cwl["steps"][0]["run"]["id"] == "count-lines16-wf.cwl@step_step1@run" + assert ( + cwl["steps"][0]["run"]["id"] + == "count-lines16-wf.cwl@step_step1@run" + ) assert ( cwl["steps"][0]["run"]["steps"][0]["run"]["id"] == "count-lines16-wf.cwl@step_step1@wc-tool.cwl" @@ -81,7 +88,10 @@ def test_step_process_id(): def test_remote_packing(): - cwl = pack("https://raw.githubusercontent.com/kaushik-work/sbpack/master/tests/wf2.cwl") + cwl = pack( + "https://raw.githubusercontent.com/kaushik-work/" + "sbpack/master/tests/wf2.cwl" + ) s1 = _find(cwl.get("steps"), "id", "s1") wf1 = s1.get("run") assert wf1.get("class") == "Workflow" @@ -93,11 +103,15 @@ def test_remote_packing(): def test_remote_packing_github_soft_links(): - cwl = pack("https://raw.githubusercontent.com/rabix/sbpack/master/tests/workflows/wf5.cwl") + cwl = pack( + "https://raw.githubusercontent.com/rabix/" + "sbpack/master/tests/workflows/wf5.cwl" + ) s1 = _find(cwl.get("steps"), "id", "s1") tool1 = s1.get("run") assert tool1.get("class") == "CommandLineTool" + def test_already_packed_graph(): """Workflow already packed in a $graph.""" cwl = pack("workflows/scatter-wf4.cwl") @@ -106,6 +120,7 @@ def test_already_packed_graph(): assert "$graph" in cwl assert "requirements" not in cwl + def test_import_in_type(): cwl = pack("workflows/import-in-type.cwl") assert cwl["inputs"][0]["type"] == ["File", "Directory"] diff --git a/tests/test_pulling.py b/tests/test_pulling.py index f441beb..2223281 100644 --- a/tests/test_pulling.py +++ b/tests/test_pulling.py @@ -7,7 +7,13 @@ def test_pull(): with tempfile.TemporaryDirectory() as td: out_dir = pathlib.Path(td) - subprocess.run(["sbpull", "--unpack", ".", "admin/sbg-public-data/eqtl-analysis-with-fastqtl-gtex-v7/", str(out_dir / "eqtl.cwl")], check=True) + subprocess.run([ + "sbpull", + "--unpack", + ".", + "admin/sbg-public-data/eqtl-analysis-with-fastqtl-gtex-v7/", + str(out_dir / "eqtl.cwl"), + ], check=True) assert (out_dir / "eqtl.cwl").exists() assert (out_dir / "eqtl.cwl.steps").exists() diff --git a/tests/test_schemadefs.py b/tests/test_schemadefs.py index de5d172..43659c8 100644 --- a/tests/test_schemadefs.py +++ b/tests/test_schemadefs.py @@ -1,5 +1,3 @@ -import urllib.parse - from sbpack.pack import pack diff --git a/tests/test_validation_battery.py b/tests/test_validation_battery.py index c92bbad..f68a83e 100644 --- a/tests/test_validation_battery.py +++ b/tests/test_validation_battery.py @@ -23,17 +23,29 @@ def cwl_is_valid(fname): @pytest.mark.parametrize( ('f',), - [("tools/clt1.cwl",), ("tools/clt2.cwl",), ("tools/clt3.cwl",), - ("workflows/wf1.cwl",), ("workflows/wf2.cwl",), ("workflows/wf4.cwl",), - ("workflows/wf-with-git.cwl",), - ("https://raw.githubusercontent.com/rabix/sbpack/master/tests/workflows/wf1.cwl",), - ("https://raw.githubusercontent.com/rabix/sbpack/master/tests/workflows/wf2.cwl",), - ("https://raw.githubusercontent.com/rabix/sbpack/master/tests/workflows/wf4.cwl",), - ("remote-cwl/tool1.cwl",), ("remote-cwl/tool2.cwl",), ("remote-cwl/wf1.cwl",) - ] + [ + ("tools/clt1.cwl",), + ("tools/clt2.cwl",), + ("tools/clt3.cwl",), + ("workflows/wf1.cwl",), + ("workflows/wf2.cwl",), + ("workflows/wf4.cwl",), + ("workflows/wf-with-git.cwl",), + ("https://raw.githubusercontent.com/rabix/" + "sbpack/master/tests/workflows/wf1.cwl",), + ("https://raw.githubusercontent.com/rabix/" + "sbpack/master/tests/workflows/wf2.cwl",), + ("https://raw.githubusercontent.com/rabix/" + "sbpack/master/tests/workflows/wf4.cwl",), + ("remote-cwl/tool1.cwl",), + ("remote-cwl/tool2.cwl",), + ("remote-cwl/wf1.cwl",) + ] +) +@pytest.mark.skipif( + sys.platform == 'win32', + reason='Skip on windows due to errors in cwltool with import pwd' ) -@pytest.mark.skipif(sys.platform == 'win32', - reason='Skip on windows due to errors in cwltool with import pwd') def test_local_packing_with_validation(f): url = urllib.parse.urlparse(f) packed_name = pathlib.Path(url.path).stem + "-packed.cwl"