From a2446eb14b3e542e47fc1537a7a6cd8660cf8aac Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Mon, 15 Jul 2024 01:44:09 +0700 Subject: [PATCH 1/3] Allow adding multiple biosample --- bin/combine_parsed_data.py | 22 +++++++++-- bin/combine_statistics_data.py | 6 ++- modules/local/combine_metadata.nf | 16 +++++++- modules/local/parse_metadata.nf | 2 +- modules/local/run_wget.nf | 5 ++- subworkflows/local/genome_metadata.nf | 55 +++++++++++++++++++-------- 6 files changed, 80 insertions(+), 26 deletions(-) diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py index b30d4962..b99f9482 100755 --- a/bin/combine_parsed_data.py +++ b/bin/combine_parsed_data.py @@ -55,7 +55,10 @@ def process_file(file_in, params): if any(p in string.punctuation for p in value): value = '"' + value + '"' - source_dict[key] = value + if key in source_dict: + source_dict[key].append(value) + else: + source_dict[key] = [value] if key in params: params[key].append(value) @@ -72,8 +75,18 @@ def main(args=None): params_inconsistent = {} for file in files: - (params, paramDict) = process_file(getattr(args, file[1]), params) - param_sets[file[0]] = paramDict + file_list = getattr(args, file[1]) + if file_list: + for single_file in file_list.split(','): + (params, paramDict) = process_file(single_file, params) + if file[0] not in param_sets: + param_sets[file[0]] = paramDict + else: + for key, values in paramDict.items(): + if key in param_sets[file[0]]: + param_sets[file[0]][key].extend(values) + else: + param_sets[file[0]][key] = values for key in params.keys(): value_set = {v for v in params[key]} @@ -82,7 +95,8 @@ def main(args=None): for source in param_sets: if key in param_sets[source]: - params_inconsistent[key].append((source, param_sets[source][key])) + for val in param_sets[source][key]: + params_inconsistent[key].append((source, val)) # Strip inconsistent data from parameter list for i in params_inconsistent.keys(): diff --git a/bin/combine_statistics_data.py b/bin/combine_statistics_data.py index 2e48c1dc..948f68cc 100755 --- a/bin/combine_statistics_data.py +++ b/bin/combine_statistics_data.py @@ -82,7 +82,11 @@ def process_inconsistent_file(file, params, inconsistent, consistent): else: key = row.pop(0) - if consistent.get(key) is None: + if key in inconsistent: + # Append to existing inconsistent values + inconsistent[key].extend(row) + else: + # Add new inconsistent values inconsistent[key] = row return inconsistent diff --git a/modules/local/combine_metadata.nf b/modules/local/combine_metadata.nf index 1c3af9d4..ecada08a 100644 --- a/modules/local/combine_metadata.nf +++ b/modules/local/combine_metadata.nf @@ -21,12 +21,24 @@ process COMBINE_METADATA { script: def args = [] def prefix = task.ext.prefix ?: meta.id + def biosample_files = [] + for (item in file_list){ def file = item def file_ext = item.getExtension() def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}") + "_file" - args.add(file_name) - args.add(file) + + if (file_name.contains("biosample")) { + biosample_files.add(file) + } else { + args.add(file_name) + args.add(file) + } + } + + if (!biosample_files.isEmpty()) { + args.add("--ena_biosample_file") + args.add(biosample_files.join(",")) } """ diff --git a/modules/local/parse_metadata.nf b/modules/local/parse_metadata.nf index d9f85ded..6383bc81 100644 --- a/modules/local/parse_metadata.nf +++ b/modules/local/parse_metadata.nf @@ -22,7 +22,7 @@ process PARSE_METADATA { script: // This script is bundled with the pipeline, in nf-core/genomenote/bin/ def prefix = task.ext.prefix ?: meta.id def script_name = "parse_${meta.ext.toLowerCase()}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.py" - def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.csv" + def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}${meta.biosample}.csv" """ $script_name \\ $json \\ diff --git a/modules/local/run_wget.nf b/modules/local/run_wget.nf index 8211f55a..cbd6c1f2 100644 --- a/modules/local/run_wget.nf +++ b/modules/local/run_wget.nf @@ -14,7 +14,7 @@ process RUN_WGET { output: - tuple val(meta), path("${meta.id}_${meta.source}_${meta.type}.${meta.ext}") , emit: file_path + tuple val(meta), path("${meta.id}_${meta.source}_${meta.type}*.${meta.ext}") , emit: file_path path "versions.yml" , emit: versions when: @@ -22,8 +22,9 @@ process RUN_WGET { script: def no_certificate = (meta.source == 'GOAT') ? '--no-check-certificate' : '' + def output = "${meta.id}_${meta.source}_${meta.type}${meta.biosample}.${meta.ext}".strip('_') """ - wget ${no_certificate} -c -O ${meta.id}_${meta.source}_${meta.type}.${meta.ext} '${url}' + wget ${no_certificate} -c -O ${output} '${url}' cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/genome_metadata.nf b/subworkflows/local/genome_metadata.nf index df68a782..69b8a924 100644 --- a/subworkflows/local/genome_metadata.nf +++ b/subworkflows/local/genome_metadata.nf @@ -15,37 +15,60 @@ workflow GENOME_METADATA { main: ch_versions = Channel.empty() - + // Define channel for RUN_WGET ch_file_list | splitCsv(header: ['source', 'type', 'url', 'ext'], skip: 1) - | map { row -> - [ - // meta - [ id: params.assembly, - taxon_id: params.taxon_id, - source: row.source, - type: row.type, - ext: row.ext, - ], - // url - row.url + | flatMap { row -> + // Create a list to hold the final entries + def entries = [] + + // Common metadata + def metadata = [ + id: params.assembly, + taxon_id: params.taxon_id, + source: row.source, + type: row.type, + ext: row.ext + ] + + // Process each biosample + params.biosample.split(',').each { biosample -> + def url = row.url .replaceAll(/ASSEMBLY_ACCESSION/, params.assembly) .replaceAll(/TAXONOMY_ID/, params.taxon_id) .replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject) - .replaceAll(/BIOSAMPLE_ACCESSION/, params.biosample) - ] + .replaceAll(/BIOSAMPLE_ACCESSION/, biosample.trim()) + + if (row.type == 'Biosample') { + // Add entry with biosample in metadata for Biosample type + entries << [ + metadata + [biosample: biosample.trim()], + url + ] + } else { + // Add entry without biosample in metadata for other types + entries << [ + metadata + [biosample: ''], + url + ] + } + } + + return entries } + | unique() | set { file_list } + file_list.view() // Fetch files RUN_WGET ( file_list ) - ch_versions = ch_versions.mix( RUN_WGET.out.versions.first() ) + ch_versions = ch_versions.mix( RUN_WGET.out.versions.first() ) PARSE_METADATA(RUN_WGET.out.file_path) ch_versions = ch_versions.mix( PARSE_METADATA.out.versions.first() ) - PARSE_METADATA.out.file_path + PARSE_METADATA.out.file_path | map { it -> tuple( it[1] )} | collect | map { it -> From bebcec70d0a3fd5c29d20a07b196b708e8515e12 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Tue, 30 Jul 2024 11:44:11 +0700 Subject: [PATCH 2/3] Rename params.biosample to biosample_wgs, add params.biosample_rna and params.biosample_hic --- bin/combine_parsed_data.py | 30 ++++++----------- bin/combine_statistics_data.py | 6 +--- bin/parse_xml_ena_biosample.py | 19 ++++++++--- conf/test.config | 4 ++- conf/test_full.config | 4 ++- docs/usage.md | 8 +++-- modules/local/combine_metadata.nf | 18 ++-------- modules/local/parse_metadata.nf | 3 +- modules/local/run_wget.nf | 3 +- nextflow.config | 4 ++- nextflow_schema.json | 12 +++++-- subworkflows/local/genome_metadata.nf | 48 ++++++++++++++++----------- workflows/genomenote.nf | 4 +-- 13 files changed, 86 insertions(+), 77 deletions(-) diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py index b99f9482..3179a7f8 100755 --- a/bin/combine_parsed_data.py +++ b/bin/combine_parsed_data.py @@ -10,7 +10,9 @@ files = [ ("ENA_ASSEMBLY", "ena_assembly_file"), ("ENA_BIOPROJECT", "ena_bioproject_file"), - ("ENA_BIOSAMPLE", "ena_biosample_file"), + ("ENA_BIOSAMPLE", "ena_biosample_wgs_file"), + ("ENA_BIOSAMPLE_HIC", "ena_biosample_hic_file"), + ("ENA_BIOSAMPLE_RNA", "ena_biosample_rna_file"), ("ENA_TAXONOMY", "ena_taxonomy_file"), ("NCBI_ASSEMBLY", "ncbi_assembly_file"), ("NCBI_TAXONOMY", "ncbi_taxonomy_file"), @@ -25,7 +27,9 @@ def parse_args(args=None): parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("--ena_assembly_file", help="Input parsed ENA assembly file.", required=False) parser.add_argument("--ena_bioproject_file", help="Input parsed ENA assembly file.", required=False) - parser.add_argument("--ena_biosample_file", help="Input parsed ENA assembly file.", required=False) + parser.add_argument("--ena_biosample_wgs_file", help="Input parsed ENA genomic biosample file.", required=False) + parser.add_argument("--ena_biosample_hic_file", help="Input parsed ENA HiC biosample file.", required=False) + parser.add_argument("--ena_biosample_rna_file", help="Input parsed ENA RNASeq biosample file.", required=False) parser.add_argument("--ena_taxonomy_file", help="Input parsed ENA assembly file.", required=False) parser.add_argument("--ncbi_assembly_file", help="Input parsed ENA assembly file.", required=False) parser.add_argument("--ncbi_taxonomy_file", help="Input parsed ENA assembly file.", required=False) @@ -55,10 +59,7 @@ def process_file(file_in, params): if any(p in string.punctuation for p in value): value = '"' + value + '"' - if key in source_dict: - source_dict[key].append(value) - else: - source_dict[key] = [value] + source_dict[key] = value if key in params: params[key].append(value) @@ -75,18 +76,8 @@ def main(args=None): params_inconsistent = {} for file in files: - file_list = getattr(args, file[1]) - if file_list: - for single_file in file_list.split(','): - (params, paramDict) = process_file(single_file, params) - if file[0] not in param_sets: - param_sets[file[0]] = paramDict - else: - for key, values in paramDict.items(): - if key in param_sets[file[0]]: - param_sets[file[0]][key].extend(values) - else: - param_sets[file[0]][key] = values + (params, paramDict) = process_file(getattr(args, file[1]), params) + param_sets[file[0]] = paramDict for key in params.keys(): value_set = {v for v in params[key]} @@ -95,8 +86,7 @@ def main(args=None): for source in param_sets: if key in param_sets[source]: - for val in param_sets[source][key]: - params_inconsistent[key].append((source, val)) + params_inconsistent[key].append((source, param_sets[source][key])) # Strip inconsistent data from parameter list for i in params_inconsistent.keys(): diff --git a/bin/combine_statistics_data.py b/bin/combine_statistics_data.py index 948f68cc..2e48c1dc 100755 --- a/bin/combine_statistics_data.py +++ b/bin/combine_statistics_data.py @@ -82,11 +82,7 @@ def process_inconsistent_file(file, params, inconsistent, consistent): else: key = row.pop(0) - if key in inconsistent: - # Append to existing inconsistent values - inconsistent[key].extend(row) - else: - # Add new inconsistent values + if consistent.get(key) is None: inconsistent[key] = row return inconsistent diff --git a/bin/parse_xml_ena_biosample.py b/bin/parse_xml_ena_biosample.py index bd0390c5..2c55c924 100755 --- a/bin/parse_xml_ena_biosample.py +++ b/bin/parse_xml_ena_biosample.py @@ -43,7 +43,7 @@ def parse_args(args=None): - Description = "Parse contents of an ENA SAMPLE report and pul out meta data required by a genome note." + Description = "Parse contents of an ENA SAMPLE report and pull out meta data required by a genome note." Epilog = "Example usage: python parse_xml_ena_sample.py " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) @@ -77,6 +77,13 @@ def parse_xml(file_in, file_out): root = tree.getroot() param_list = [] + # Extract biosample type from FILE_OUT + biosample_type = None + if "HIC" in file_out.upper(): + biosample_type = "HIC" + elif "RNA" in file_out.upper(): + biosample_type = "RNA" + for f in fetch: param = None r = root @@ -101,8 +108,7 @@ def parse_xml(file_in, file_out): param = r.attrib.get(f[2][1]) except ValueError: param = None - - ## Count child elements with specfic tag + ## Count child elements with specific tag if f[2][0] == "count": if r is not None: param = str(len(r.findall(f[2][1]))) if len(r.findall(f[2][1])) != 0 else None @@ -129,8 +135,11 @@ def parse_xml(file_in, file_out): if any(p in string.punctuation for p in param): param = '"' + param + '"' - - param_list.append([f[0], param]) + # Prefix parameter name if biosample type is HiC or RNA + param_name = f[0] + if biosample_type in ["HIC", "RNA"]: + param_name = f"{biosample_type}_{param_name}" + param_list.append([param_name, param]) if len(param_list) > 0: out_dir = os.path.dirname(file_out) diff --git a/conf/test.config b/conf/test.config index 93b044d3..aef2428f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -33,7 +33,9 @@ params { species = 'Cloeon_dipterum' taxon_id = '197152' bioproject = 'PRJEB45177' - biosample = 'SAMEA7520803' + biosample_wgs = 'SAMEA7520803' + biosample_hic = 'SAMEA7520846' + biosample_rna = 'SAMEA7521081' // Genome Notes Portal write_to_portal = false diff --git a/conf/test_full.config b/conf/test_full.config index 99f7c747..6f151b91 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -33,7 +33,9 @@ params { species = 'Ypsolopha_sequella' taxon_id = '1870436' bioproject = 'PRJEB50740' - biosample = 'SAMEA7519929' + biosample_wgs = 'SAMEA7519929' + biosample_hic = 'SAMEA7519968' + biosample_rna = null // Genome Notes Portal write_to_portal = false diff --git a/docs/usage.md b/docs/usage.md index d2cc74d4..295ac70a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -25,7 +25,9 @@ You will need to supply the assembly accession for the genome you would like to --species '[species name]' --taxon_id '[taxon id]' --bioproject '[bioproject accession]' - --biosample '[biosample accession]' + --biosample_wgs '[biosample accession of the biosample used to produce the genomic sequence]' + --biosample_hic '[biosample accession of the biosample used to produce the HiC data]' + --biosample_rna '[biosample accession of the biosample used to produce the RNASeq data] ``` If you wish to run the optional step that writes genome metatdata key value-pairs to a genome notes databases you will need to set the parameter "write_to_portal" to true and provide the base url for the REST API that writes to the database. @@ -101,7 +103,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir --fasta genome.fasta --assembly GCA_922984935.2 --species Epithemia_sp._CRS-2021b --taxon_id 2809013 --bioproject PRJEB49353 --biosample SAMEA7524400 -profile docker +nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir --fasta genome.fasta --assembly GCA_922984935.2 --species Epithemia_sp._CRS-2021b --taxon_id 2809013 --bioproject PRJEB49353 --biosample_wgs SAMEA7524400 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -137,7 +139,7 @@ assembly: 'GCA_922984935.2' species: 'Epithemia_sp._CRS-2021b' taxon_id: '2809013' bioproject: 'PRJEB49353' -biosample: 'SAMEA7524400' +biosample_wgs: 'SAMEA7524400' <...> ``` diff --git a/modules/local/combine_metadata.nf b/modules/local/combine_metadata.nf index ecada08a..9011f0f2 100644 --- a/modules/local/combine_metadata.nf +++ b/modules/local/combine_metadata.nf @@ -21,24 +21,12 @@ process COMBINE_METADATA { script: def args = [] def prefix = task.ext.prefix ?: meta.id - def biosample_files = [] - for (item in file_list){ def file = item def file_ext = item.getExtension() - def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}") + "_file" - - if (file_name.contains("biosample")) { - biosample_files.add(file) - } else { - args.add(file_name) - args.add(file) - } - } - - if (!biosample_files.isEmpty()) { - args.add("--ena_biosample_file") - args.add(biosample_files.join(",")) + def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}").toLowerCase() + "_file" + args.add(file_name) + args.add(file) } """ diff --git a/modules/local/parse_metadata.nf b/modules/local/parse_metadata.nf index 6383bc81..0f895a86 100644 --- a/modules/local/parse_metadata.nf +++ b/modules/local/parse_metadata.nf @@ -22,7 +22,8 @@ process PARSE_METADATA { script: // This script is bundled with the pipeline, in nf-core/genomenote/bin/ def prefix = task.ext.prefix ?: meta.id def script_name = "parse_${meta.ext.toLowerCase()}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.py" - def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}${meta.biosample}.csv" + def is_biosample = (meta.biosample_type == "WGS" || meta.biosample_type == "HIC" || meta.biosample_type == "RNA") ? "_${meta.biosample_type}" : "" + def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}${is_biosample}.csv".strip('_') """ $script_name \\ $json \\ diff --git a/modules/local/run_wget.nf b/modules/local/run_wget.nf index cbd6c1f2..6ae86b9f 100644 --- a/modules/local/run_wget.nf +++ b/modules/local/run_wget.nf @@ -22,7 +22,8 @@ process RUN_WGET { script: def no_certificate = (meta.source == 'GOAT') ? '--no-check-certificate' : '' - def output = "${meta.id}_${meta.source}_${meta.type}${meta.biosample}.${meta.ext}".strip('_') + def is_biosample = (meta.biosample_type == "WGS" || meta.biosample_type == "HIC" || meta.biosample_type == "RNA") ? "_${meta.biosample_type}" : "" + def output = "${meta.id}_${meta.source}_${meta.type}${is_biosample}.${meta.ext}".strip('_') """ wget ${no_certificate} -c -O ${output} '${url}' diff --git a/nextflow.config b/nextflow.config index ae7dc43d..c0e9ee66 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,7 +19,9 @@ params { species = null taxon_id = null bioproject = null - biosample = null + biosample_wgs = null + biosample_hic = null + biosample_rna = null // Genome Notes write_to_portal = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 3fd4962b..9525f1d8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,9 +48,17 @@ "type": "string", "description": "The bioproject accesion linked to the genome assembly, for example: PRJEB49353." }, - "biosample": { + "biosample_wgs": { "type": "string", - "description": "The biosample accesion(s) linked to the samples in the experiment, for example: SAMEA7524400." + "description": "The biosample accesion(s) linked to the WGS samples in the experiment, for example: SAMEA7520803." + }, + "biosample_rna": { + "type": "string", + "description": "The biosample accesion(s) linked to the RNA samples in the experiment, for example: SAMEA7521081." + }, + "biosample_hic": { + "type": "string", + "description": "The biosample accesion(s) linked to the Hi-C samples in the experiment, for example: SAMEA7520846." }, "outdir": { "type": "string", diff --git a/subworkflows/local/genome_metadata.nf b/subworkflows/local/genome_metadata.nf index 69b8a924..673a99ab 100644 --- a/subworkflows/local/genome_metadata.nf +++ b/subworkflows/local/genome_metadata.nf @@ -32,34 +32,42 @@ workflow GENOME_METADATA { ext: row.ext ] + // Define biosamples with their types + def biosamples = [ + ["WGS", params.biosample_wgs], + ["HIC", params.biosample_hic], + ["RNA", params.biosample_rna] + ] + // Process each biosample - params.biosample.split(',').each { biosample -> - def url = row.url - .replaceAll(/ASSEMBLY_ACCESSION/, params.assembly) - .replaceAll(/TAXONOMY_ID/, params.taxon_id) - .replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject) - .replaceAll(/BIOSAMPLE_ACCESSION/, biosample.trim()) + biosamples.each { biosampleType, biosampleID -> + if ( biosampleID != null ) { + // Skip if biosampleID is null} + def url = row.url + .replaceAll(/ASSEMBLY_ACCESSION/, params.assembly) + .replaceAll(/TAXONOMY_ID/, params.taxon_id) + .replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject) + .replaceAll(/BIOSAMPLE_ACCESSION/, biosampleID) - if (row.type == 'Biosample') { - // Add entry with biosample in metadata for Biosample type - entries << [ - metadata + [biosample: biosample.trim()], - url - ] - } else { - // Add entry without biosample in metadata for other types - entries << [ - metadata + [biosample: ''], - url - ] + if (row.type == 'Biosample') { + // Add entry with biosample type in metadata for Biosample type + entries << [ + metadata + [biosample_type: biosampleType], + url + ] + } else { + // Add entry without biosample type in metadata for other types + entries << [ + metadata + [biosample_type: ''], + url + ] + } } } - return entries } | unique() | set { file_list } - file_list.view() // Fetch files RUN_WGET ( file_list ) diff --git a/workflows/genomenote.nf b/workflows/genomenote.nf index c7ca7d8c..2d264c51 100644 --- a/workflows/genomenote.nf +++ b/workflows/genomenote.nf @@ -14,8 +14,8 @@ def checkPathParamList = [ params.input, params.multiqc_config, params.lineage_d for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters -if (params.assembly && params.taxon_id && params.bioproject && params.biosample) { metadata_inputs = [ params.assembly, params.taxon_id, params.bioproject, params.biosample ] } -else { exit 1, 'Metadata input not specified. Please include an assembly accession, a taxon id, a bioproject accession and a biosample_accession' } +if (params.assembly && params.taxon_id && params.bioproject && params.biosample_wgs) { metadata_inputs = [ params.assembly, params.taxon_id, params.bioproject, params.biosample_wgs ] } +else { exit 1, 'Metadata input not specified. Please include an assembly accession, a taxon id, a bioproject accession and a biosample accession' } if (params.input) { ch_input = Channel.fromPath(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.fasta) { ch_fasta = Channel.fromPath(params.fasta) } else { exit 1, 'Genome fasta not specified!' } if (params.binsize) { ch_bin = Channel.of(params.binsize) } else { exit 1, 'Bin size for cooler/cload not specified!' } From e0e8c074bc6af3a45ce847312a44f57ea93dc0cb Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Tue, 30 Jul 2024 12:29:59 +0700 Subject: [PATCH 3/3] fix format error --- bin/combine_parsed_data.py | 2 +- modules/local/combine_metadata.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py index 3179a7f8..1f2757ef 100755 --- a/bin/combine_parsed_data.py +++ b/bin/combine_parsed_data.py @@ -12,7 +12,7 @@ ("ENA_BIOPROJECT", "ena_bioproject_file"), ("ENA_BIOSAMPLE", "ena_biosample_wgs_file"), ("ENA_BIOSAMPLE_HIC", "ena_biosample_hic_file"), - ("ENA_BIOSAMPLE_RNA", "ena_biosample_rna_file"), + ("ENA_BIOSAMPLE_RNA", "ena_biosample_rna_file"), ("ENA_TAXONOMY", "ena_taxonomy_file"), ("NCBI_ASSEMBLY", "ncbi_assembly_file"), ("NCBI_TAXONOMY", "ncbi_taxonomy_file"), diff --git a/modules/local/combine_metadata.nf b/modules/local/combine_metadata.nf index 9011f0f2..69dbfcde 100644 --- a/modules/local/combine_metadata.nf +++ b/modules/local/combine_metadata.nf @@ -24,7 +24,7 @@ process COMBINE_METADATA { for (item in file_list){ def file = item def file_ext = item.getExtension() - def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}").toLowerCase() + "_file" + def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}").toLowerCase() + "_file" args.add(file_name) args.add(file) }