Skip to content

Commit

Permalink
add dorado modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
yuukiiwa committed Sep 19, 2024
1 parent dd82756 commit 3d4521a
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 85 deletions.
2 changes: 1 addition & 1 deletion bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def check_samplesheet(file_in, updated_path, file_out):
input_file = "/".join([updated_path, input_file.split("/")[-1]])
list_dir = os.listdir(input_file)
fast5 = input_file
if not (all(fname.endswith(".fast5") for fname in list_dir)):
if not (all(fname.endswith(".fast5") for fname in list_dir)) and not (all(fname.endswith(".pod5") for fname in list_dir)):
if "fast5" in list_dir and "fastq" in list_dir:
fast5 = input_file + "/fast5"
## CHECK FAST5 DIRECTORY
Expand Down
2 changes: 1 addition & 1 deletion conf/test_bc_nodx.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ params {
max_time = 12.h

// Input data to perform basecalling and to skip demultipexing
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_nodx.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_nodx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
Expand Down
36 changes: 36 additions & 0 deletions conf/test_bc_nodx_dnamod.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_bc_nodx,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on Travis
max_cpus = 2
max_memory = 6.GB
max_time = 12.h

// Input data to perform basecalling and to skip demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/nanoseq/3.2/samplesheet/samplesheet_bc_nodx_dnamod.csv'
input_path_file_type= 'pod5'
bedmethyl_out = true
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS108'
dorado_model = '[email protected]'
dorado_modification = '5mCG_5hmCG'
dorado_device = 'cpu'
skip_bigbed = true
skip_bigwig = true
skip_demultiplexing = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
}
4 changes: 2 additions & 2 deletions modules/local/bambu.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ process BAMBU {

conda "conda-forge::r-base=4.0.3 bioconda::bioconductor-bambu=3.0.8 bioconda::bioconductor-bsgenome=1.66.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-bambu:3.2.4--r43hf17093f_0' :
'quay.io/biocontainers/bioconductor-bambu:3.2.4--r43hf17093f_0' }"
'https://depot.galaxyproject.org/singularity/bioconductor-bambu:3.4.0--r43hf17093f_0' :
'quay.io/biocontainers/bioconductor-bambu:3.4.0--r43hf17093f_0' }"

input:
tuple path(fasta), path(gtf)
Expand Down
25 changes: 25 additions & 0 deletions modules/local/dorado_aligner.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process DORADO_ALIGNER {
tag "$meta.id"
label 'process_medium'

container "docker.io/ontresearch/dorado"

input:
tuple val(meta), path(mod_bam)
path fasta

output:
tuple val(meta), path("aligned_sorted.bam"), path("*.bai") , emit: aligned_bam
path "versions.yml" , emit: versions

script:
"""
dorado aligner --mm2-preset map-ont $fasta $mod_bam > aligned.bam && samtools sort aligned.bam -o aligned_sorted.bam && samtools index aligned_sorted.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: \$(echo \$(dorado --version 2>&1) | sed -r 's/.{81}//')
END_VERSIONS
"""
}

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process DORADO {
process DORADO_BASECALLER {
tag "$meta.id"
label 'process_medium'

Expand All @@ -10,20 +10,19 @@ process DORADO {
val dorado_model

output:
tuple val(meta), path("*.fastq.gz") , emit: fastq
tuple val(meta), path("basecall*") , emit: dorado_out
path "versions.yml" , emit: versions

script:
def emit_args = (params.dorado_modification == null) ? " --emit-fastq > basecall.fastq && gzip basecall.fastq" : " --modified-bases $params.dorado_modification > basecall.bam"
"""
dorado download --model $dorado_model
dorado basecaller $dorado_model $pod5_path --device $dorado_device --emit-fastq > basecall.fastq
dorado basecaller $dorado_model $pod5_path --device $dorado_device $emit_args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: \$(echo \$(dorado --version 2>&1) | sed -r 's/.{81}//')
END_VERSIONS
gzip basecall.fastq
"""
}

1 change: 1 addition & 0 deletions modules/local/get_test_data.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ process GET_TEST_DATA {
output:
path "test-datasets/fast5/$barcoded/" , emit: ch_input_fast5_dir_path
path "test-datasets/modification_fast5_fastq/", emit: ch_input_dir_path
path "test-datasets/pod5/" , emit: ch_pod5_dir_path
path "versions.yml" , emit: versions

when:
Expand Down
27 changes: 0 additions & 27 deletions modules/local/modkit_bedmethyl.nf

This file was deleted.

29 changes: 29 additions & 0 deletions modules/local/modkit_pileup.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
process MODKIT_PILEUP {
tag "$meta.id"
label 'process_high_memory'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ont-modkit:0.4.0--h5c23e0d_0' :
'quay.io/biocontainers/ont-modkit:0.4.0--h5c23e0d_0' }"

input:
tuple val(meta), path(aligned_mod_bam), path(bai)

output:
tuple val(meta), path(bedmethyl) , emit: bedmethyl
path "versions.yml" , emit: versions

script:
bedmethyl = "$meta.id" +".bed"
"""
modkit pileup $aligned_mod_bam $bedmethyl --threads $task.cpus
cat <<-END_VERSIONS > versions.yml
"${task.process}":
modkit: \$(echo \$(modkit --version 2>&1) | sed -r 's/.{81}//')
END_VERSIONS
gzip basecall.fastq
"""
}

4 changes: 4 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ params {

// Options: Basecalling and Demultiplexing
input_path = null
input_path_file_type = 'fast5'
bedmethyl_out = false
flowcell = null
kit = null
barcode_kit = null
barcode_both_ends = false
trim_barcodes = false
dorado_model = null
doraro_modification = null
dorado_device = 'cuda:all'
qcat_min_score = 60
qcat_detect_middle = false
Expand Down Expand Up @@ -227,6 +230,7 @@ profiles {
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test_bc_nodx { includeConfig 'conf/test_bc_nodx.config' }
test_bc_nodx_dnamod { includeConfig 'conf/test_bc_nodx_dnamod.config' }
test_nobc_dx { includeConfig 'conf/test_nobc_dx.config' }
test_nobc_nodx_stringtie { includeConfig 'conf/test_nobc_nodx_stringtie.config' }
test_nobc_nodx_noaln { includeConfig 'conf/test_nobc_nodx_noaln.config' }
Expand Down
125 changes: 76 additions & 49 deletions workflows/nanoseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
include { GET_TEST_DATA } from '../modules/local/get_test_data'
include { GET_NANOLYSE_FASTA } from '../modules/local/get_nanolyse_fasta'
include { FAST5_TO_POD5 } from '../modules/local/fast5_to_pod5'
include { DORADO } from '../modules/local/dorado'
include { DORADO_BASECALLER } from '../modules/local/dorado_basecaller'
include { DORADO_ALIGNER } from '../modules/local/dorado_aligner'
include { MODKIT_PILEUP } from '../modules/local/modkit_pileup'
include { GTF2BED } from '../modules/local/gtf2bed'
include { BAM_RENAME } from '../modules/local/bam_rename'
include { BAMBU } from '../modules/local/bambu'
Expand Down Expand Up @@ -181,8 +183,13 @@ workflow NANOSEQ{
if (!isOffline()) {
GET_TEST_DATA ()
if (params.skip_modification_analysis) {
GET_TEST_DATA.out.ch_input_fast5_dir_path
.set { ch_input_path }
if (params.bedmethyl_out){
GET_TEST_DATA.out.ch_pod5_dir_path
.set { ch_input_path }
} else {
GET_TEST_DATA.out.ch_input_fast5_dir_path
.set { ch_input_path }
}
} else {
GET_TEST_DATA.out.ch_input_dir_path
.set { ch_input_path }
Expand Down Expand Up @@ -216,51 +223,68 @@ workflow NANOSEQ{
.set { ch_sample }

if (!params.skip_basecalling) {
if (params.input_path_file_type == 'fast5'){
if (!params.skip_demultiplexing) {
ch_input_path
.map { it -> [ [id:'undemultiplexed'], it ] }
.set { ch_fast5_dir }
} else {
ch_sample
.set { ch_fast5_dir }
}

if (!params.skip_demultiplexing) {
ch_input_path
.map { it -> [ [id:'undemultiplexed'], it ] }
.set { ch_fast5_dir }
/*
* MODULE: Convert fast5 to pod5
*/
FAST5_TO_POD5 ( ch_fast5_dir )
ch_pod5 = FAST5_TO_POD5.out.pod5
} else {
ch_sample
.set { ch_fast5_dir }
if (!params.skip_demultiplexing) {
ch_input_path
.map { it -> [ [id:'undemultiplexed'], it ] }
.set { ch_pod5 }
} else {
ch_sample
.set { ch_pod5 }
}
}

/*
* MODULE: Convert fast5 to pod5
*/
FAST5_TO_POD5 ( ch_fast5_dir )
ch_pod5 = FAST5_TO_POD5.out.pod5

/*
* MODULE: Basecalling and demultipexing using Dorado
*/
DORADO ( ch_pod5, params.dorado_device, params.dorado_model )
ch_software_versions = ch_software_versions.mix(DORADO.out.versions.ifEmpty(null))

if (!params.skip_demultiplexing) {

/*
* MODULE: Demultipexing using qcat
*/
ch_barcode_kit = Channel.from(params.barcode_kit)

/*
* MODULE: Demultipexing using qcat
*/
QCAT ( DORADO.out.fastq , ch_barcode_kit )
QCAT.out.reads
.map { it -> it[1] }
.flatten()
.map { it -> [ it.baseName.substring(0,it.baseName.lastIndexOf('.')), it ] }
.join(ch_sample.map{ meta, empty -> [meta.barcode, meta] }, by: [0] )
.map { it -> [ it[2], it[1] ] }
.set { ch_fastq } // [ meta, .fastq.qz ]
ch_software_versions = ch_software_versions.mix(QCAT.out.versions.ifEmpty(null))
DORADO_BASECALLER ( ch_pod5, params.dorado_device, params.dorado_model )
ch_software_versions = ch_software_versions.mix(DORADO_BASECALLER.out.versions.ifEmpty(null))
if (!params.bedmethyl_out) {
if (!params.skip_demultiplexing) {

/*
* MODULE: Demultipexing using qcat
*/
ch_barcode_kit = Channel.from(params.barcode_kit)

/*
* MODULE: Demultipexing using qcat
*/
QCAT ( DORADO_BASECALLER.out.dorado_out , ch_barcode_kit )
QCAT.out.reads
.map { it -> it[1] }
.flatten()
.map { it -> [ it.baseName.substring(0,it.baseName.lastIndexOf('.')), it ] }
.join(ch_sample.map{ meta, empty -> [meta.barcode, meta] }, by: [0] )
.map { it -> [ it[2], it[1] ] }
.set { ch_fastq } // [ meta, .fastq.qz ]
ch_software_versions = ch_software_versions.mix(QCAT.out.versions.ifEmpty(null))

} else {
DORADO_BASECALLER.out.dorado_out
.set { ch_fastq }
}
} else {
DORADO.out.fastq
.set { ch_fastq }
ch_fastq = Channel.empty()
ch_fasta = Channel.empty()
DORADO_ALIGNER( DORADO_BASECALLER.out.dorado_out, params.fasta )
MODKIT_PILEUP ( DORADO_ALIGNER.out.aligned_bam )
}

} else {
Expand Down Expand Up @@ -301,6 +325,7 @@ workflow NANOSEQ{
}
}


if (params.run_nanolyse) {
if (!params.nanolyse_fasta) {
if (!isOffline()) {
Expand Down Expand Up @@ -337,20 +362,22 @@ workflow NANOSEQ{
ch_fastqc_multiqc = QCFASTQ_NANOPLOT_FASTQC.out.fastqc_multiqc.ifEmpty([])
}

ch_fasta = Channel.from( [id:'reference'], fasta ).collect()
if (!params.bedmethyl_out){
ch_fasta = Channel.from( [id:'reference'], fasta ).collect()

/*
/*
* SUBWORKFLOW: Make chromosome size file and covert GTF to BED12
*/
CUSTOM_GETCHROMSIZES( ch_fasta )
ch_chr_sizes = CUSTOM_GETCHROMSIZES.out.sizes
ch_fai = CUSTOM_GETCHROMSIZES.out.fai
ch_software_versions = ch_software_versions.mix(CUSTOM_GETCHROMSIZES.out.versions.first().ifEmpty(null))

// will add the following in when nf-core/modules/minimap2/align supports junction bed input
//GTF2BED ( ch_chr_sizes )
//ch_gtf_bed = GTF2BED.out.gtf_bed
//gtf2bed_version = GTF2BED.out.versions
CUSTOM_GETCHROMSIZES( ch_fasta )
ch_chr_sizes = CUSTOM_GETCHROMSIZES.out.sizes
ch_fai = CUSTOM_GETCHROMSIZES.out.fai
ch_software_versions = ch_software_versions.mix(CUSTOM_GETCHROMSIZES.out.versions.first().ifEmpty(null))

// will add the following in when nf-core/modules/minimap2/align supports junction bed input
//GTF2BED ( ch_chr_sizes )
//ch_gtf_bed = GTF2BED.out.gtf_bed
//gtf2bed_version = GTF2BED.out.versions
}

ch_samtools_multiqc = Channel.empty()
if (!params.skip_alignment) {
Expand Down

0 comments on commit 3d4521a

Please sign in to comment.