Skip to content

Commit

Permalink
Merge pull request sanger-tol#110 from sanger-tol/xml_template
Browse files Browse the repository at this point in the history
Allow population of either an XML or docx genome note template
  • Loading branch information
BethYates authored Mar 27, 2024
2 parents 659026f + ac7b5ea commit 12bff64
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 41 deletions.
1 change: 1 addition & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ lint:
multiqc_config:
- report_comment
actions_ci: false
template_strings: False
34 changes: 34 additions & 0 deletions assets/genome_note_template.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article>
<article>
<body>
<sec>
<title>Species taxonomy</title>
<p>{{ TAX_STRING }};
<italic>{{ GENUS }}</italic>;
<italic>{{ GENUS_SPECIES }}</italic> ($TAXONOMY_AUTHORITY) (NCBI:txid{{ NCBI_TAXID }}) {{ TEST_NOT_REPLACED }}.
</p>
</sec>
<sec>
<table>
<thead>
<tr>
<th align="center" valign="top">INSDC accession</th>
<th align="center" valign="top">Chromosome</th>
<th align="center" valign="top">Length (Mb)</th>
<th align="center" valign="top">GC%</th>
</tr>
</thead>
<tbody>
{% for chromosome in CHR_TABLE %}
<tr>
<td align="left" valign="top">{{ chromosome.get('Accession') }}</td>
<td align="center" valign="top">{{ chromosome.get('Chromosome') }}</td>
<td align="center" valign="top">{{ chromosome.get('Length') }}</td>
<td align="center" valign="top">{{ chromosome.get('GC') }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</sec>
</body>
</article>
2 changes: 1 addition & 1 deletion assets/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sample,datatype,datafile
ieCloDipt1,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Cloeon_dipterum/genomic_data/ieCloDipt1/pacbio/m64094_200901_101741.ccs.bc1001_BAK8A_OA--bc1001_BAK8A_OA.subsampled.bam
ieCloDipt1,hic,https://tolit.cog.sanger.ac.uk/test-data/Cloeon_dipterum/analysis/ieCloDipt1.1/read_mapping/hic/GCA_949628265.1.unmasked.hic.ieCloDipt1.subsampled.cram
ieCloDipt1,hic,https://tolit.cog.sanger.ac.uk/test-data/Cloeon_dipterum/analysis/ieCloDipt1.1/read_mapping/hic/GCA_949628265.1.unmasked.hic.ieCloDipt1.subsampled.cram
30 changes: 21 additions & 9 deletions bin/populate_genome_note_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ def parse_args(args=None):

parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument("PARAM_FILE", help="Input parameters CSV file.")
parser.add_argument("TEMPLATE_FILE", help="Input Genome Note Template Doc file.")
parser.add_argument("TEMPLATE_FILE", help="Input Genome Note Template file.")
parser.add_argument("TEMPLATE_TYPE", help="Input Genome Note Template file type.")
parser.add_argument("FILE_OUT", help="Output file.")
parser.add_argument("--version", action="version", version="%(prog)s 1.0")
parser.add_argument("--version", action="version", version="%(prog)s 1.1")
return parser.parse_args(args)


Expand All @@ -26,10 +27,14 @@ def make_dir(path):
os.makedirs(path, exist_ok=True)


def write_file(template, file_out):
def write_file(template, type, file_out):
out_dir = os.path.dirname(file_out)
make_dir(out_dir)
template.save(os.path.join(out_dir, file_out))
if type == "docx":
template.save(os.path.join(out_dir, file_out))
else:
with open(file_out, "w") as fout:
fout.write(template)


def build_param_list(param_file):
Expand Down Expand Up @@ -82,18 +87,25 @@ def build_param_list(param_file):
return mydict


def populate_template(param_file, template_file, file_out):
def populate_template(param_file, template_file, template_type, file_out):
myenv = jinja2.Environment(undefined=jinja2.DebugUndefined)
context = build_param_list(param_file)
template = DocxTemplate(template_file)
template.render(context, myenv)
if template_type == "docx":
template = DocxTemplate(template_file)
template.render(context, myenv)
write_file(template, template_type, file_out)
else:
with open(template_file, "r") as file:
data = file.read()

write_file(template, file_out)
template = myenv.from_string(data)
content = template.render(context)
write_file(content, template_type, file_out)


def main(args=None):
args = parse_args(args)
populate_template(args.PARAM_FILE, args.TEMPLATE_FILE, args.FILE_OUT)
populate_template(args.PARAM_FILE, args.TEMPLATE_FILE, args.TEMPLATE_TYPE, args.FILE_OUT)


if __name__ == "__main__":
Expand Down
27 changes: 0 additions & 27 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,6 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Increasing the number of CPUs often gives diminishing returns, so we increase it
following a logarithm curve. Example:
- 0 < value <= 1: start + step
- 1 < value <= 2: start + 2*step
- 2 < value <= 4: start + 3*step
- 4 < value <= 8: start + 4*step
In order to support re-runs, the step increase may be multiplied by the attempt
number prior to calling this function.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Modified logarithm function that doesn't return negative numbers
def positive_log(value, base) {
if (value <= 1) {
return 0
} else {
return Math.log(value)/Math.log(base)
}
}

def log_increase_cpus(start, step, value, base) {
return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus')
}


process {

errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
Expand Down
1 change: 1 addition & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ params {
// Genome Notes Portal
write_to_portal = false
genome_notes_api = "https://notes-staging.tol.sanger.ac.uk/api/v1"
note_template = "${projectDir}/assets/genome_note_template.xml"

// HiGlass Options
upload_higlass_data = false
Expand Down
2 changes: 2 additions & 0 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ params {
// Genome Notes Portal
write_to_portal = false
genome_notes_api = "https://notes-staging.tol.sanger.ac.uk/api/v1"
note_template = "${projectDir}/assets/genome_note_template.docx"


// HiGlass Options
upload_higlass_data = true
Expand Down
6 changes: 6 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ If you wish to run the optional step that writes genome metatdata key value-pair
--genome_notes_api '[URL for Genome Notes Portal API]'
```

If you want to populate a genome notes template file with the key-value pairs generated by this pipeline you will need to pass the path to the template file as the "note_template" parameter. Templates may be either docx or xml format.

```bash
--note_template '[URL for Genome Notes Portal API]'
```

You will also need to set a nextflow secret to store the API key belonging to your user.

```bash
Expand Down
6 changes: 4 additions & 2 deletions modules/local/populate_template.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,22 @@ process POPULATE_TEMPLATE {
path(note_template)

output:
tuple val(meta), path("*.docx"), emit: genome_note
tuple val(meta), path("*.{docx,xml}"), emit: genome_note
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: meta.id
def file_type = note_template.extension

"""
populate_genome_note_template.py \\
$param_data \\
$note_template \\
${prefix}.docx
${file_type} \\
${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
27 changes: 27 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ params {
// Genome Notes
write_to_portal = false
genome_notes_api = null
note_template = null

// HiGlass options
upload_higlass_data = false
Expand Down Expand Up @@ -280,3 +281,29 @@ def check_max(obj, type) {
}
}
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Increasing the number of CPUs often gives diminishing returns, so we increase it
following a logarithm curve. Example:
- 0 < value <= 1: start + step
- 1 < value <= 2: start + 2*step
- 2 < value <= 4: start + 3*step
- 4 < value <= 8: start + 4*step
In order to support re-runs, the step increase may be multiplied by the attempt
number prior to calling this function.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Modified logarithm function that doesn't return negative numbers
def positive_log(value, base) {
if (value <= 1) {
return 0
} else {
return Math.log(value)/Math.log(base)
}
}

def log_increase_cpus(start, step, value, base) {
return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus')
}
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@
"description": "URL for Genome Notes Portal API .",
"fa_icon": "far fa-file-code"
},
"note_template": {
"type": "string",
"format": "file-path",
"description": "The path to a genome note template file.",
"help_text": "Set this parameter if you have a genome note template file that you wish to populate. Templates may be docx or xml files",
"fa_icon": "fas fa-folder-open"
},
"upload_higlass_data": {
"type": "boolean",
"description": "flag to control if Higlass server should be updated to add new files",
Expand Down
3 changes: 1 addition & 2 deletions workflows/genomenote.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.linea

// Check optional parameters
if (params.lineage_db) { ch_lineage_db = Channel.fromPath(params.lineage_db) } else { ch_lineage_db = Channel.empty() }

if (params.note_template) { ch_note_template = Channel.fromPath(params.note_template) } else { ch_note_template = Channel.empty() }

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -35,7 +35,6 @@ if (params.lineage_db) { ch_lineage_db = Channel.fromPath(params.lineage_db) } e

ch_metdata_input = Channel.of( metadata_inputs )
ch_file_list = Channel.fromPath("$projectDir/assets/genome_metadata_template.csv")
ch_note_template = Channel.fromPath("$projectDir/assets/genome_note_template.docx")
ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
Expand Down

0 comments on commit 12bff64

Please sign in to comment.