From a2446eb14b3e542e47fc1537a7a6cd8660cf8aac Mon Sep 17 00:00:00 2001
From: reichan1998 <baochauduong1311@gmail.com>
Date: Mon, 15 Jul 2024 01:44:09 +0700
Subject: [PATCH 1/3] Allow adding multiple biosample

---
 bin/combine_parsed_data.py            | 22 +++++++++--
 bin/combine_statistics_data.py        |  6 ++-
 modules/local/combine_metadata.nf     | 16 +++++++-
 modules/local/parse_metadata.nf       |  2 +-
 modules/local/run_wget.nf             |  5 ++-
 subworkflows/local/genome_metadata.nf | 55 +++++++++++++++++++--------
 6 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py
index b30d4962..b99f9482 100755
--- a/bin/combine_parsed_data.py
+++ b/bin/combine_parsed_data.py
@@ -55,7 +55,10 @@ def process_file(file_in, params):
             if any(p in string.punctuation for p in value):
                 value = '"' + value + '"'
 
-            source_dict[key] = value
+            if key in source_dict:
+                source_dict[key].append(value)
+            else:
+                source_dict[key] = [value]
 
             if key in params:
                 params[key].append(value)
@@ -72,8 +75,18 @@ def main(args=None):
     params_inconsistent = {}
 
     for file in files:
-        (params, paramDict) = process_file(getattr(args, file[1]), params)
-        param_sets[file[0]] = paramDict
+        file_list = getattr(args, file[1])
+        if file_list:
+            for single_file in file_list.split(','):
+                (params, paramDict) = process_file(single_file, params)
+                if file[0] not in param_sets:
+                    param_sets[file[0]] = paramDict
+                else:
+                    for key, values in paramDict.items():
+                        if key in param_sets[file[0]]:
+                            param_sets[file[0]][key].extend(values)
+                        else:
+                            param_sets[file[0]][key] = values
 
     for key in params.keys():
         value_set = {v for v in params[key]}
@@ -82,7 +95,8 @@ def main(args=None):
 
             for source in param_sets:
                 if key in param_sets[source]:
-                    params_inconsistent[key].append((source, param_sets[source][key]))
+                    for val in param_sets[source][key]:
+                        params_inconsistent[key].append((source, val))
 
     # Strip inconsistent data from parameter list
     for i in params_inconsistent.keys():
diff --git a/bin/combine_statistics_data.py b/bin/combine_statistics_data.py
index 2e48c1dc..948f68cc 100755
--- a/bin/combine_statistics_data.py
+++ b/bin/combine_statistics_data.py
@@ -82,7 +82,11 @@ def process_inconsistent_file(file, params, inconsistent, consistent):
             else:
                 key = row.pop(0)
 
-                if consistent.get(key) is None:
+                if key in inconsistent:
+                    # Append to existing inconsistent values
+                    inconsistent[key].extend(row)
+                else:
+                    # Add new inconsistent values
                     inconsistent[key] = row
 
     return inconsistent
diff --git a/modules/local/combine_metadata.nf b/modules/local/combine_metadata.nf
index 1c3af9d4..ecada08a 100644
--- a/modules/local/combine_metadata.nf
+++ b/modules/local/combine_metadata.nf
@@ -21,12 +21,24 @@ process COMBINE_METADATA {
     script:
     def args = []
     def prefix = task.ext.prefix ?: meta.id
+    def biosample_files = []
+
     for (item in  file_list){
         def file = item
         def file_ext = item.getExtension()
         def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}") + "_file"
-        args.add(file_name)
-        args.add(file)
+        
+        if (file_name.contains("biosample")) {
+                biosample_files.add(file)
+        } else {
+            args.add(file_name)
+            args.add(file)
+        }
+    }
+    
+    if (!biosample_files.isEmpty()) {
+        args.add("--ena_biosample_file")
+        args.add(biosample_files.join(","))
     }
 
     """
diff --git a/modules/local/parse_metadata.nf b/modules/local/parse_metadata.nf
index d9f85ded..6383bc81 100644
--- a/modules/local/parse_metadata.nf
+++ b/modules/local/parse_metadata.nf
@@ -22,7 +22,7 @@ process PARSE_METADATA {
     script: // This script is bundled with the pipeline, in nf-core/genomenote/bin/
     def prefix = task.ext.prefix ?: meta.id
     def script_name = "parse_${meta.ext.toLowerCase()}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.py"
-    def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.csv"
+    def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}${meta.biosample}.csv"
     """
     $script_name \\
         $json \\
diff --git a/modules/local/run_wget.nf b/modules/local/run_wget.nf
index 8211f55a..cbd6c1f2 100644
--- a/modules/local/run_wget.nf
+++ b/modules/local/run_wget.nf
@@ -14,7 +14,7 @@ process RUN_WGET {
 
 
     output:
-    tuple val(meta), path("${meta.id}_${meta.source}_${meta.type}.${meta.ext}") , emit:  file_path
+    tuple val(meta), path("${meta.id}_${meta.source}_${meta.type}*.${meta.ext}") , emit:  file_path
     path "versions.yml"                         , emit: versions
 
     when:
@@ -22,8 +22,9 @@ process RUN_WGET {
 
     script:
     def  no_certificate = (meta.source == 'GOAT') ? '--no-check-certificate' : ''
+    def output = "${meta.id}_${meta.source}_${meta.type}${meta.biosample}.${meta.ext}".strip('_')
     """
-        wget ${no_certificate} -c -O ${meta.id}_${meta.source}_${meta.type}.${meta.ext} '${url}'
+        wget ${no_certificate} -c -O ${output} '${url}'
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
diff --git a/subworkflows/local/genome_metadata.nf b/subworkflows/local/genome_metadata.nf
index df68a782..69b8a924 100644
--- a/subworkflows/local/genome_metadata.nf
+++ b/subworkflows/local/genome_metadata.nf
@@ -15,37 +15,60 @@ workflow GENOME_METADATA {
 
     main:
     ch_versions = Channel.empty()
- 
+
     // Define channel for RUN_WGET
     ch_file_list
     | splitCsv(header: ['source', 'type', 'url', 'ext'], skip: 1)
-    | map { row -> 
-        [   
-            // meta
-            [   id: params.assembly,
-                taxon_id: params.taxon_id,
-                source: row.source, 
-                type: row.type, 
-                ext: row.ext, 
-            ],
-            // url 
-            row.url
+    | flatMap { row ->
+        // Create a list to hold the final entries
+        def entries = []
+
+        // Common metadata
+        def metadata = [
+            id: params.assembly,
+            taxon_id: params.taxon_id,
+            source: row.source,
+            type: row.type,
+            ext: row.ext
+        ]
+
+        // Process each biosample
+        params.biosample.split(',').each { biosample ->
+            def url = row.url
                 .replaceAll(/ASSEMBLY_ACCESSION/, params.assembly)
                 .replaceAll(/TAXONOMY_ID/, params.taxon_id)
                 .replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject)
-                .replaceAll(/BIOSAMPLE_ACCESSION/, params.biosample)
-        ]
+                .replaceAll(/BIOSAMPLE_ACCESSION/, biosample.trim())
+
+            if (row.type == 'Biosample') {
+                // Add entry with biosample in metadata for Biosample type
+                entries << [
+                    metadata + [biosample: biosample.trim()],
+                    url
+                ]
+            } else {
+                // Add entry without biosample in metadata for other types
+                entries << [
+                    metadata + [biosample: ''],
+                    url
+                ]
+            }
+        }
+
+        return entries
     }
+    | unique()
     | set { file_list }
+    file_list.view()
 
     // Fetch files
     RUN_WGET ( file_list )
-    ch_versions = ch_versions.mix( RUN_WGET.out.versions.first() ) 
+    ch_versions = ch_versions.mix( RUN_WGET.out.versions.first() )
 
     PARSE_METADATA(RUN_WGET.out.file_path)
     ch_versions = ch_versions.mix( PARSE_METADATA.out.versions.first() )
     
-    PARSE_METADATA.out.file_path 
+    PARSE_METADATA.out.file_path
     | map { it -> tuple( it[1] )}
     | collect  
     | map { it ->

From bebcec70d0a3fd5c29d20a07b196b708e8515e12 Mon Sep 17 00:00:00 2001
From: reichan1998 <baochauduong1311@gmail.com>
Date: Tue, 30 Jul 2024 11:44:11 +0700
Subject: [PATCH 2/3] Rename params.biosample to biosample_wgs, add
 params.biosample_rna and params.biosample_hic

---
 bin/combine_parsed_data.py            | 30 ++++++-----------
 bin/combine_statistics_data.py        |  6 +---
 bin/parse_xml_ena_biosample.py        | 19 ++++++++---
 conf/test.config                      |  4 ++-
 conf/test_full.config                 |  4 ++-
 docs/usage.md                         |  8 +++--
 modules/local/combine_metadata.nf     | 18 ++--------
 modules/local/parse_metadata.nf       |  3 +-
 modules/local/run_wget.nf             |  3 +-
 nextflow.config                       |  4 ++-
 nextflow_schema.json                  | 12 +++++--
 subworkflows/local/genome_metadata.nf | 48 ++++++++++++++++-----------
 workflows/genomenote.nf               |  4 +--
 13 files changed, 86 insertions(+), 77 deletions(-)

diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py
index b99f9482..3179a7f8 100755
--- a/bin/combine_parsed_data.py
+++ b/bin/combine_parsed_data.py
@@ -10,7 +10,9 @@
 files = [
     ("ENA_ASSEMBLY", "ena_assembly_file"),
     ("ENA_BIOPROJECT", "ena_bioproject_file"),
-    ("ENA_BIOSAMPLE", "ena_biosample_file"),
+    ("ENA_BIOSAMPLE", "ena_biosample_wgs_file"),
+    ("ENA_BIOSAMPLE_HIC", "ena_biosample_hic_file"),
+    ("ENA_BIOSAMPLE_RNA", "ena_biosample_rna_file"), 
     ("ENA_TAXONOMY", "ena_taxonomy_file"),
     ("NCBI_ASSEMBLY", "ncbi_assembly_file"),
     ("NCBI_TAXONOMY", "ncbi_taxonomy_file"),
@@ -25,7 +27,9 @@ def parse_args(args=None):
     parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
     parser.add_argument("--ena_assembly_file", help="Input parsed ENA assembly file.", required=False)
     parser.add_argument("--ena_bioproject_file", help="Input parsed ENA assembly file.", required=False)
-    parser.add_argument("--ena_biosample_file", help="Input parsed ENA assembly file.", required=False)
+    parser.add_argument("--ena_biosample_wgs_file", help="Input parsed ENA genomic biosample file.", required=False)
+    parser.add_argument("--ena_biosample_hic_file", help="Input parsed ENA HiC biosample file.", required=False)
+    parser.add_argument("--ena_biosample_rna_file", help="Input parsed ENA RNASeq biosample file.", required=False)
     parser.add_argument("--ena_taxonomy_file", help="Input parsed ENA assembly file.", required=False)
     parser.add_argument("--ncbi_assembly_file", help="Input parsed ENA assembly file.", required=False)
     parser.add_argument("--ncbi_taxonomy_file", help="Input parsed ENA assembly file.", required=False)
@@ -55,10 +59,7 @@ def process_file(file_in, params):
             if any(p in string.punctuation for p in value):
                 value = '"' + value + '"'
 
-            if key in source_dict:
-                source_dict[key].append(value)
-            else:
-                source_dict[key] = [value]
+            source_dict[key] = value
 
             if key in params:
                 params[key].append(value)
@@ -75,18 +76,8 @@ def main(args=None):
     params_inconsistent = {}
 
     for file in files:
-        file_list = getattr(args, file[1])
-        if file_list:
-            for single_file in file_list.split(','):
-                (params, paramDict) = process_file(single_file, params)
-                if file[0] not in param_sets:
-                    param_sets[file[0]] = paramDict
-                else:
-                    for key, values in paramDict.items():
-                        if key in param_sets[file[0]]:
-                            param_sets[file[0]][key].extend(values)
-                        else:
-                            param_sets[file[0]][key] = values
+        (params, paramDict) = process_file(getattr(args, file[1]), params)
+        param_sets[file[0]] = paramDict
 
     for key in params.keys():
         value_set = {v for v in params[key]}
@@ -95,8 +86,7 @@ def main(args=None):
 
             for source in param_sets:
                 if key in param_sets[source]:
-                    for val in param_sets[source][key]:
-                        params_inconsistent[key].append((source, val))
+                    params_inconsistent[key].append((source, param_sets[source][key]))
 
     # Strip inconsistent data from parameter list
     for i in params_inconsistent.keys():
diff --git a/bin/combine_statistics_data.py b/bin/combine_statistics_data.py
index 948f68cc..2e48c1dc 100755
--- a/bin/combine_statistics_data.py
+++ b/bin/combine_statistics_data.py
@@ -82,11 +82,7 @@ def process_inconsistent_file(file, params, inconsistent, consistent):
             else:
                 key = row.pop(0)
 
-                if key in inconsistent:
-                    # Append to existing inconsistent values
-                    inconsistent[key].extend(row)
-                else:
-                    # Add new inconsistent values
+                if consistent.get(key) is None:
                     inconsistent[key] = row
 
     return inconsistent
diff --git a/bin/parse_xml_ena_biosample.py b/bin/parse_xml_ena_biosample.py
index bd0390c5..2c55c924 100755
--- a/bin/parse_xml_ena_biosample.py
+++ b/bin/parse_xml_ena_biosample.py
@@ -43,7 +43,7 @@
 
 
 def parse_args(args=None):
-    Description = "Parse contents of an ENA SAMPLE report and pul out meta data required by a genome note."
+    Description = "Parse contents of an ENA SAMPLE report and pull out meta data required by a genome note."
     Epilog = "Example usage: python parse_xml_ena_sample.py <FILE_IN> <FILE_OUT>"
 
     parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
@@ -77,6 +77,13 @@ def parse_xml(file_in, file_out):
     root = tree.getroot()
     param_list = []
 
+    # Extract biosample type from FILE_OUT
+    biosample_type = None
+    if "HIC" in file_out.upper():
+        biosample_type = "HIC"
+    elif "RNA" in file_out.upper():
+        biosample_type = "RNA"
+
     for f in fetch:
         param = None
         r = root
@@ -101,8 +108,7 @@ def parse_xml(file_in, file_out):
                             param = r.attrib.get(f[2][1])
                         except ValueError:
                             param = None
-
-                    ## Count child elements with specfic tag
+                    ## Count child elements with specific tag
                     if f[2][0] == "count":
                         if r is not None:
                             param = str(len(r.findall(f[2][1]))) if len(r.findall(f[2][1])) != 0 else None
@@ -129,8 +135,11 @@ def parse_xml(file_in, file_out):
 
                     if any(p in string.punctuation for p in param):
                         param = '"' + param + '"'
-
-                    param_list.append([f[0], param])
+                    # Prefix parameter name if biosample type is HiC or RNA
+                    param_name = f[0]
+                    if biosample_type in ["HIC", "RNA"]:
+                        param_name = f"{biosample_type}_{param_name}"
+                    param_list.append([param_name, param])
 
     if len(param_list) > 0:
         out_dir = os.path.dirname(file_out)
diff --git a/conf/test.config b/conf/test.config
index 93b044d3..aef2428f 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -33,7 +33,9 @@ params {
     species  = 'Cloeon_dipterum'
     taxon_id = '197152'
     bioproject = 'PRJEB45177'
-    biosample =  'SAMEA7520803'
+    biosample_wgs = 'SAMEA7520803'
+    biosample_hic = 'SAMEA7520846'
+    biosample_rna = 'SAMEA7521081'
 
     // Genome Notes Portal
     write_to_portal  = false
diff --git a/conf/test_full.config b/conf/test_full.config
index 99f7c747..6f151b91 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -33,7 +33,9 @@ params {
     species = 'Ypsolopha_sequella'
     taxon_id = '1870436'
     bioproject = 'PRJEB50740'
-    biosample =  'SAMEA7519929'
+    biosample_wgs =  'SAMEA7519929'
+    biosample_hic = 'SAMEA7519968'
+    biosample_rna = null
 
     // Genome Notes Portal
     write_to_portal  = false
diff --git a/docs/usage.md b/docs/usage.md
index d2cc74d4..295ac70a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -25,7 +25,9 @@ You will need to supply the assembly accession for the genome you would like to
    --species '[species name]'
    --taxon_id '[taxon id]'
    --bioproject '[bioproject accession]'
-   --biosample '[biosample accession]'
+   --biosample_wgs '[biosample accession of the biosample used to produce the genomic sequence]'
+   --biosample_hic '[biosample accession of the biosample used to produce the HiC data]'
+   --biosample_rna '[biosample accession of the biosample used to produce the RNASeq data]
 ```
 
 If you wish to run the optional step that writes genome metatdata key value-pairs to a genome notes databases you will need to set the parameter "write_to_portal" to true and provide the base url for the REST API that writes to the database.
@@ -101,7 +103,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p
 The typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir <OUTDIR> --fasta genome.fasta --assembly GCA_922984935.2 --species Epithemia_sp._CRS-2021b --taxon_id 2809013 --bioproject PRJEB49353 --biosample SAMEA7524400 -profile docker
+nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir <OUTDIR> --fasta genome.fasta --assembly GCA_922984935.2 --species Epithemia_sp._CRS-2021b --taxon_id 2809013 --bioproject PRJEB49353 --biosample_wgs SAMEA7524400 -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@@ -137,7 +139,7 @@ assembly: 'GCA_922984935.2'
 species: 'Epithemia_sp._CRS-2021b'
 taxon_id: '2809013'
 bioproject: 'PRJEB49353'
-biosample: 'SAMEA7524400'
+biosample_wgs: 'SAMEA7524400'
 <...>
 ```
 
diff --git a/modules/local/combine_metadata.nf b/modules/local/combine_metadata.nf
index ecada08a..9011f0f2 100644
--- a/modules/local/combine_metadata.nf
+++ b/modules/local/combine_metadata.nf
@@ -21,24 +21,12 @@ process COMBINE_METADATA {
     script:
     def args = []
     def prefix = task.ext.prefix ?: meta.id
-    def biosample_files = []
-
     for (item in  file_list){
         def file = item
         def file_ext = item.getExtension()
-        def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}") + "_file"
-        
-        if (file_name.contains("biosample")) {
-                biosample_files.add(file)
-        } else {
-            args.add(file_name)
-            args.add(file)
-        }
-    }
-    
-    if (!biosample_files.isEmpty()) {
-        args.add("--ena_biosample_file")
-        args.add(biosample_files.join(","))
+        def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}").toLowerCase() + "_file" 
+        args.add(file_name)
+        args.add(file)
     }
 
     """
diff --git a/modules/local/parse_metadata.nf b/modules/local/parse_metadata.nf
index 6383bc81..0f895a86 100644
--- a/modules/local/parse_metadata.nf
+++ b/modules/local/parse_metadata.nf
@@ -22,7 +22,8 @@ process PARSE_METADATA {
     script: // This script is bundled with the pipeline, in nf-core/genomenote/bin/
     def prefix = task.ext.prefix ?: meta.id
     def script_name = "parse_${meta.ext.toLowerCase()}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.py"
-    def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}${meta.biosample}.csv"
+    def is_biosample = (meta.biosample_type == "WGS" || meta.biosample_type == "HIC" || meta.biosample_type == "RNA") ? "_${meta.biosample_type}" : ""
+    def output_file = "${prefix}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}${is_biosample}.csv".strip('_')
     """
     $script_name \\
         $json \\
diff --git a/modules/local/run_wget.nf b/modules/local/run_wget.nf
index cbd6c1f2..6ae86b9f 100644
--- a/modules/local/run_wget.nf
+++ b/modules/local/run_wget.nf
@@ -22,7 +22,8 @@ process RUN_WGET {
 
     script:
     def  no_certificate = (meta.source == 'GOAT') ? '--no-check-certificate' : ''
-    def output = "${meta.id}_${meta.source}_${meta.type}${meta.biosample}.${meta.ext}".strip('_')
+    def is_biosample = (meta.biosample_type == "WGS" || meta.biosample_type == "HIC" || meta.biosample_type == "RNA") ? "_${meta.biosample_type}" : ""
+    def output = "${meta.id}_${meta.source}_${meta.type}${is_biosample}.${meta.ext}".strip('_')
     """
         wget ${no_certificate} -c -O ${output} '${url}'
 
diff --git a/nextflow.config b/nextflow.config
index ae7dc43d..c0e9ee66 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -19,7 +19,9 @@ params {
     species                         = null
     taxon_id                        = null
     bioproject                      = null
-    biosample                       = null
+    biosample_wgs                   = null
+    biosample_hic                   = null
+    biosample_rna                   = null
 
     // Genome Notes 
     write_to_portal                 = false     
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3fd4962b..9525f1d8 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -48,9 +48,17 @@
                     "type": "string",
                     "description": "The bioproject accesion linked to the genome assembly, for example: PRJEB49353."
                 },
-                "biosample": {
+                "biosample_wgs": {
                     "type": "string",
-                    "description": "The biosample accesion(s) linked to the samples in the experiment, for example: SAMEA7524400."
+                    "description": "The biosample accesion(s) linked to the WGS samples in the experiment, for example: SAMEA7520803."
+                },
+                "biosample_rna": {
+                    "type": "string",
+                    "description": "The biosample accesion(s) linked to the RNA samples in the experiment, for example: SAMEA7521081."
+                },
+                "biosample_hic": {
+                    "type": "string",
+                    "description": "The biosample accesion(s) linked to the Hi-C samples in the experiment, for example: SAMEA7520846."
                 },
                 "outdir": {
                     "type": "string",
diff --git a/subworkflows/local/genome_metadata.nf b/subworkflows/local/genome_metadata.nf
index 69b8a924..673a99ab 100644
--- a/subworkflows/local/genome_metadata.nf
+++ b/subworkflows/local/genome_metadata.nf
@@ -32,34 +32,42 @@ workflow GENOME_METADATA {
             ext: row.ext
         ]
 
+        // Define biosamples with their types
+        def biosamples = [
+            ["WGS", params.biosample_wgs],
+            ["HIC", params.biosample_hic],
+            ["RNA", params.biosample_rna]
+        ]
+
         // Process each biosample
-        params.biosample.split(',').each { biosample ->
-            def url = row.url
-                .replaceAll(/ASSEMBLY_ACCESSION/, params.assembly)
-                .replaceAll(/TAXONOMY_ID/, params.taxon_id)
-                .replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject)
-                .replaceAll(/BIOSAMPLE_ACCESSION/, biosample.trim())
+        biosamples.each { biosampleType, biosampleID ->
+            if ( biosampleID != null ) {
+                // Skip if biosampleID is null}
+                def url = row.url
+                    .replaceAll(/ASSEMBLY_ACCESSION/, params.assembly)
+                    .replaceAll(/TAXONOMY_ID/, params.taxon_id)
+                    .replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject)
+                    .replaceAll(/BIOSAMPLE_ACCESSION/, biosampleID)
 
-            if (row.type == 'Biosample') {
-                // Add entry with biosample in metadata for Biosample type
-                entries << [
-                    metadata + [biosample: biosample.trim()],
-                    url
-                ]
-            } else {
-                // Add entry without biosample in metadata for other types
-                entries << [
-                    metadata + [biosample: ''],
-                    url
-                ]
+                if (row.type == 'Biosample') {
+                    // Add entry with biosample type in metadata for Biosample type
+                    entries << [
+                        metadata + [biosample_type: biosampleType],
+                        url
+                    ]
+                } else {
+                    // Add entry without biosample type in metadata for other types
+                    entries << [
+                        metadata + [biosample_type: ''],
+                        url
+                    ]
+                }
             }
         }
-
         return entries
     }
     | unique()
     | set { file_list }
-    file_list.view()
 
     // Fetch files
     RUN_WGET ( file_list )
diff --git a/workflows/genomenote.nf b/workflows/genomenote.nf
index c7ca7d8c..2d264c51 100644
--- a/workflows/genomenote.nf
+++ b/workflows/genomenote.nf
@@ -14,8 +14,8 @@ def checkPathParamList = [ params.input, params.multiqc_config, params.lineage_d
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters
-if (params.assembly && params.taxon_id && params.bioproject && params.biosample) { metadata_inputs = [ params.assembly, params.taxon_id, params.bioproject, params.biosample ] }
-else { exit 1, 'Metadata input not specified. Please include an assembly accession, a taxon id, a bioproject accession and a biosample_accession' }
+if (params.assembly && params.taxon_id && params.bioproject && params.biosample_wgs) { metadata_inputs = [ params.assembly, params.taxon_id, params.bioproject, params.biosample_wgs ] }
+else { exit 1, 'Metadata input not specified. Please include an assembly accession, a taxon id, a bioproject accession and a biosample accession' }
 if (params.input)     { ch_input = Channel.fromPath(params.input) } else { exit 1, 'Input samplesheet not specified!' }
 if (params.fasta)     { ch_fasta = Channel.fromPath(params.fasta) } else { exit 1, 'Genome fasta not specified!' }
 if (params.binsize)   { ch_bin   = Channel.of(params.binsize)     } else { exit 1, 'Bin size for cooler/cload not specified!' }

From e0e8c074bc6af3a45ce847312a44f57ea93dc0cb Mon Sep 17 00:00:00 2001
From: reichan1998 <baochauduong1311@gmail.com>
Date: Tue, 30 Jul 2024 12:29:59 +0700
Subject: [PATCH 3/3] fix format error

---
 bin/combine_parsed_data.py        | 2 +-
 modules/local/combine_metadata.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py
index 3179a7f8..1f2757ef 100755
--- a/bin/combine_parsed_data.py
+++ b/bin/combine_parsed_data.py
@@ -12,7 +12,7 @@
     ("ENA_BIOPROJECT", "ena_bioproject_file"),
     ("ENA_BIOSAMPLE", "ena_biosample_wgs_file"),
     ("ENA_BIOSAMPLE_HIC", "ena_biosample_hic_file"),
-    ("ENA_BIOSAMPLE_RNA", "ena_biosample_rna_file"), 
+    ("ENA_BIOSAMPLE_RNA", "ena_biosample_rna_file"),
     ("ENA_TAXONOMY", "ena_taxonomy_file"),
     ("NCBI_ASSEMBLY", "ncbi_assembly_file"),
     ("NCBI_TAXONOMY", "ncbi_taxonomy_file"),
diff --git a/modules/local/combine_metadata.nf b/modules/local/combine_metadata.nf
index 9011f0f2..69dbfcde 100644
--- a/modules/local/combine_metadata.nf
+++ b/modules/local/combine_metadata.nf
@@ -24,7 +24,7 @@ process COMBINE_METADATA {
     for (item in  file_list){
         def file = item
         def file_ext = item.getExtension()
-        def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}").toLowerCase() + "_file" 
+        def file_name = "--" + item.getName().minus("${prefix}_").minus(".${file_ext}").toLowerCase() + "_file"
         args.add(file_name)
         args.add(file)
     }