diff --git a/.editorconfig b/.editorconfig
index dd9ffa5..72dda28 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -28,10 +28,6 @@ indent_style = unset
 [/assets/email*]
 indent_size = unset
 
-# ignore Readme
-[README.md]
-indent_style = unset
-
-# ignore python
+# ignore python and markdown
 [*.{py,md}]
 indent_style = unset
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 6f3b92d..a0314bc 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/scna
 - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/scnanoseq/tree/master/.github/CONTRIBUTING.md)
 - [ ] If necessary, also make a PR on the nf-core/scnanoseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
-- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`).
+- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
 - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
 - [ ] Output Documentation in `docs/output.md` is updated.
diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
index 480e4ea..490d087 100644
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -8,12 +8,12 @@ on:
     types: [published]
   workflow_dispatch:
 jobs:
-  run-tower:
+  run-platform:
     name: Run AWS full tests
     if: github.repository == 'nf-core/scnanoseq'
     runs-on: ubuntu-latest
     steps:
-      - name: Launch workflow via tower
+      - name: Launch workflow via Seqera Platform
         uses: seqeralabs/action-tower-launch@v2
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
@@ -30,7 +30,7 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
-          name: Tower debug log file
+          name: Seqera Platform debug log file
           path: |
-            tower_action_*.log
-            tower_action_*.json
+            seqera_platform_action_*.log
+            seqera_platform_action_*.json
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
index 11e0fdb..46091f9 100644
--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@@ -5,13 +5,13 @@ name: nf-core AWS test
 on:
   workflow_dispatch:
 jobs:
-  run-tower:
+  run-platform:
     name: Run AWS tests
     if: github.repository == 'nf-core/scnanoseq'
     runs-on: ubuntu-latest
     steps:
-      # Launch workflow using Tower CLI tool action
-      - name: Launch workflow via tower
+      # Launch workflow using Seqera Platform CLI tool action
+      - name: Launch workflow via Seqera Platform
         uses: seqeralabs/action-tower-launch@v2
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
@@ -27,7 +27,7 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
-          name: Tower debug log file
+          name: Seqera Platform debug log file
           path: |
-            tower_action_*.log
-            tower_action_*.json
+            seqera_platform_action_*.log
+            seqera_platform_action_*.json
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 47e8e1b..3670946 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,10 +29,10 @@ jobs:
           - "latest-everything"
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
 
       - name: Install Nextflow
-        uses: nf-core/setup-nextflow@v1
+        uses: nf-core/setup-nextflow@v2
         with:
           version: "${{ matrix.NXF_VER }}"
 
diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml
index 08622fd..2d20d64 100644
--- a/.github/workflows/download_pipeline.yml
+++ b/.github/workflows/download_pipeline.yml
@@ -14,6 +14,8 @@ on:
   pull_request:
     types:
       - opened
+      - edited
+      - synchronize
     branches:
       - master
   pull_request_target:
@@ -28,11 +30,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Install Nextflow
-        uses: nf-core/setup-nextflow@v1
+        uses: nf-core/setup-nextflow@v2
 
-      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+      - name: Disk space cleanup
+        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+
+      - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
           architecture: "x64"
       - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7
         with:
@@ -65,8 +70,17 @@ jobs:
       - name: Inspect download
         run: tree ./${{ env.REPOTITLE_LOWERCASE }}
 
-      - name: Run the downloaded pipeline
+      - name: Run the downloaded pipeline (stub)
+        id: stub_run_pipeline
+        continue-on-error: true
         env:
           NXF_SINGULARITY_CACHEDIR: ./
           NXF_SINGULARITY_HOME_MOUNT: true
         run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results
+      - name: Run the downloaded pipeline (stub run not supported)
+        id: run_pipeline
+        if: ${{ job.steps.stub_run_pipeline.status == failure() }}
+        env:
+          NXF_SINGULARITY_CACHEDIR: ./
+          NXF_SINGULARITY_HOME_MOUNT: true
+        run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results
diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
index abfa0e4..edd374a 100644
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix-linting.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       # Use the @nf-core-bot token to check out so we can push later
-      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
         with:
           token: ${{ secrets.nf_core_bot_auth_token }}
 
@@ -32,9 +32,9 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
 
       # Install and run pre-commit
-      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+      - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
         with:
-          python-version: 3.11
+          python-version: "3.12"
 
       - name: Install pre-commit
         run: pip install pre-commit
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index cf13947..1a08eb1 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -15,13 +15,12 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
 
-      - name: Set up Python 3.11
-        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+      - name: Set up Python 3.12
+        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
         with:
-          python-version: 3.11
-          cache: "pip"
+          python-version: "3.12"
 
       - name: Install pre-commit
         run: pip install pre-commit
@@ -33,14 +32,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
 
       - name: Install Nextflow
-        uses: nf-core/setup-nextflow@v1
+        uses: nf-core/setup-nextflow@v2
 
-      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+      - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
           architecture: "x64"
 
       - name: Install dependencies
@@ -61,7 +60,7 @@ jobs:
 
       - name: Upload linting log file artifact
         if: ${{ always() }}
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4
         with:
           name: linting-logs
           path: |
diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
index b706875..40acc23 100644
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Download lint results
-        uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3
+        uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3
         with:
           workflow: linting.yml
           workflow_conclusion: completed
diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml
index d468aea..03ecfcf 100644
--- a/.github/workflows/release-announcements.yml
+++ b/.github/workflows/release-announcements.yml
@@ -12,7 +12,7 @@ jobs:
       - name: get topics and convert to hashtags
         id: get_topics
         run: |
-          curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT
+          echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')"  >> $GITHUB_OUTPUT
 
       - uses: rzr/fediverse-action@master
         with:
@@ -25,13 +25,13 @@ jobs:
 
             Please see the changelog: ${{ github.event.release.html_url }}
 
-            ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics
+            ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics
 
   send-tweet:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+      - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
         with:
           python-version: "3.10"
       - name: Install dependencies
diff --git a/.nf-core.yml b/.nf-core.yml
index 465b8c9..9480b62 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,4 +1,5 @@
 repository_type: pipeline
+nf_core_version: "2.14.1"
 
 pipeline_todos: false
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index af57081..4dc0f1d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,6 +3,9 @@ repos:
     rev: "v3.1.0"
     hooks:
       - id: prettier
+        additional_dependencies:
+          - prettier@3.2.5
+
   - repo: https://github.com/editorconfig-checker/editorconfig-checker.python
     rev: "2.7.3"
     hooks:
diff --git a/CITATIONS.md b/CITATIONS.md
index a593d9d..3e3c488 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -50,7 +50,7 @@
 
 - [ToulligQC](https://github.com/GenomiqueENS/toulligQC)
 
-  >Karine Dias, Bérengère Laffay, Lionel Ferrato-Berberian, Sophie Lemoine, Ali Hamraoui, Morgane Thomas-Chollier, Stéphane Le Crom and Laurent Jourdren.
+  > Karine Dias, Bérengère Laffay, Lionel Ferrato-Berberian, Sophie Lemoine, Ali Hamraoui, Morgane Thomas-Chollier, Stéphane Le Crom and Laurent Jourdren.
 
 - [UMI-tools](https://pubmed.ncbi.nlm.nih.gov/28100584/)
 
diff --git a/README.md b/README.md
index 94d82ee..0ec4c7e 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
-[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/scnanoseq)
+[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scnanoseq)
 
 [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scnanoseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scnanoseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
 
@@ -29,7 +29,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 ![scnanoseq diagram](assets/scnanoseq_diagram.png)
 
-1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot),  [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
+1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
 2. Unzip and split FastQ ([`gunzip`](https://linux.die.net/man/1/gunzip))
    1. Optional: Split fastq for faster processing ([`split`](https://linux.die.net/man/1/split))
 3. Trim and filter reads. ([`Nanofilt`](https://github.com/wdecoster/nanofilt))
@@ -41,11 +41,8 @@ On release, automated continuous integration tests run the pipeline on a full-si
 7. Barcode correction (custom script `./bin/correct_barcodes.py`)
 8. Post-extraction QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
 9. Alignment ([`minimap2`](https://github.com/lh3/minimap2))
-10. SAMtools processing including ([`SAMtools`](http://www.htslib.org/doc/samtools.html)):
-   1. SAM to BAM
-   2. Filtering of mapped only reads
-   3. Sorting, indexing and obtain mapping metrics
-11. Post-mapping QC in unfiltered BAM files ([`NanoComp`](https://github.com/wdecoster/nanocomp), [`RSeQC`](https://rseqc.sourceforge.net/))
+10. Post-alignment filtering of mapped reads and gathering mapping QC ([`SAMtools`](http://www.htslib.org/doc/samtools.html))
+11. Post-alignment QC in unfiltered BAM files ([`NanoComp`](https://github.com/wdecoster/nanocomp), [`RSeQC`](https://rseqc.sourceforge.net/))
 12. Barcode tagging with read quality, BC, BC quality, UMI, and UMI quality (custom script `./bin/tag_barcodes.py`)
 13. UMI-based deduplication [`UMI-tools`](https://github.com/CGATOxford/UMI-tools)
 14. Gene and transcript level matrices generation. [`IsoQuant`](https://github.com/ablab/IsoQuant)
@@ -110,9 +107,9 @@ If you experience any issues, please make sure to submit an issue above. However
 process
 {
     withName: '.*:.*FASTQC.*'
-    {   
+    {
         cpus = 20
-    }   
+    }
 }
 
 //NOTE: reminder that params set in modules.config need to be copied over to a custom config
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index f7e381f..690a26e 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -57,8 +57,7 @@ top_modules:
 custom_content:
   read_counts_section:
     parent_id: read_counts_section
-    order:
-        read_counts_module
+    order: read_counts_module
 
   seurat_section:
     parent_id: seurat_section
@@ -74,7 +73,7 @@ custom_data:
     section_name: "Read Counts"
     file_format: "csv"
     plot_type: "table"
-  
+
   gene_seurat_stats_module:
     parent_id: seurat_section
     parent_name: "Seurat Section"
diff --git a/assets/nf-core-scnanoseq_logo_light.png b/assets/nf-core-scnanoseq_logo_light.png
index 4233867..f400846 100644
Binary files a/assets/nf-core-scnanoseq_logo_light.png and b/assets/nf-core-scnanoseq_logo_light.png differ
diff --git a/bin/config.py b/bin/config.py
index a5b2892..790eeea 100644
--- a/bin/config.py
+++ b/bin/config.py
@@ -15,7 +15,7 @@
 ADPT_MAC_MATCH_ED=2 #minimum proportion of match required when searching
 
 ## format suffix
-SEQ_SUFFIX_WIN=200 
+SEQ_SUFFIX_WIN=200
 SEQ_SUFFIX_MIN_MATCH_PROP=1
 SEQ_SUFFIX_AFT_ADPT=(20,50)
 
@@ -61,13 +61,12 @@ def high_sensitivity_threshold_calculation(count_array, exp_cells):
 DEFAULT_BC_STAT_FN = "summary.txt"
 DEFAULT_EMPTY_DROP_MIN_ED = 5 # minimum edit distance from emtpy drop BC to selected BC
 DEFAULT_EMPTY_DROP_NUM = 2000 # number of BC in the output
-    
 
 ####################################################
 #####    DEFAULT in Demultiplexing            ######
 ####################################################
 
-DEFAULT_ASSIGNMENT_ED = 2 
+DEFAULT_ASSIGNMENT_ED = 2
 # Make sure this is smaller than DEFAULT_GRB_FLANKING_SIZE
 assert DEFAULT_GRB_FLANKING_SIZE >= DEFAULT_ASSIGNMENT_ED
 DEFAULT_ED_FLANKING = DEFAULT_ASSIGNMENT_ED
diff --git a/bin/find_reads.py b/bin/find_reads.py
index 45a52c2..9a4bd1a 100644
--- a/bin/find_reads.py
+++ b/bin/find_reads.py
@@ -1,69 +1,70 @@
-# find specific reads with given read id and output to a new fastq file
-
-
-import argparse
-import Bio.SeqIO
-import multiprocessing as mp
-import textwrap
-from pathlib import Path
-
-
-import helper 
-
-def parse_arg():
-    parser = argparse.ArgumentParser(
-        description=textwrap.dedent(
-        '''
-        Find specific reads with given read id and output to a new fastq file
-        '''))
-    
-    # Required positional argument
-    parser.add_argument('input_fastq_dir', type=str,
-                        help='Fastq directory, Note that this should be a folder.')
-
-    # required name argment
-    requiredNamed = parser.add_argument_group('These Arguments are required')
-    requiredNamed.add_argument(
-        '--output_file', type=str, required = True,
-        help='Filename for the output fastq.')
-    requiredNamed.add_argument('--id_file', type=str, required = True,
-                        help='A file containing all the read ids to look for.')
-
-    parser.add_argument('--threads', type=int,
-                        help='Number of threads. Default: # of CPU - 1')
-
-
-    args = parser.parse_args()
-
-    return args
-
-def find_reads(in_fastq, id_list):
-    fastq = Bio.SeqIO.parse(in_fastq, "fastq")
-    read_list = [r for r in fastq if r.id in id_list]
-    return read_list
-    # check id
-def main(args):
-
-    # get ids (from args.id_file)
-    ids = []
-    with open (args.id_file, 'r') as f:
-        for line in f:
-            ids.append(line.strip())
-
-
-    fastq_fns = list(Path(args.input_fastq_dir).rglob('*.fastq'))
-    rst_futures = helper.multiprocessing_submit(find_reads,
-                                                fastq_fns, 
-                                                n_process=args.threads, 
-                                                id_list = ids)
-        
-    rst_ls = []
-    for f in rst_futures:
-        rst_ls+=f.result()
-
-    print(len(rst_ls))
-    Bio.SeqIO.write(rst_ls, args.output_file, 'fastq')
-
-if __name__ == '__main__':
-    args = parse_arg()
-    main(args)
\ No newline at end of file
+# find specific reads with given read id and output to a new fastq file
+
+
+import argparse
+import Bio.SeqIO
+import multiprocessing as mp
+import textwrap
+from pathlib import Path
+
+
+import helper
+
+def parse_arg():
+    parser = argparse.ArgumentParser(
+        description=textwrap.dedent(
+        '''
+        Find specific reads with given read id and output to a new fastq file
+        '''))
+
+    # Required positional argument
+    parser.add_argument('input_fastq_dir', type=str,
+                        help='Fastq directory, Note that this should be a folder.')
+
+    # required name argment
+    requiredNamed = parser.add_argument_group('These Arguments are required')
+    requiredNamed.add_argument(
+        '--output_file', type=str, required = True,
+        help='Filename for the output fastq.')
+    requiredNamed.add_argument('--id_file', type=str, required = True,
+                        help='A file containing all the read ids to look for.')
+
+    parser.add_argument('--threads', type=int,
+                        help='Number of threads. Default: # of CPU - 1')
+
+
+    args = parser.parse_args()
+
+    return args
+
+def find_reads(in_fastq, id_list):
+    fastq = Bio.SeqIO.parse(in_fastq, "fastq")
+    read_list = [r for r in fastq if r.id in id_list]
+    return read_list
+    # check id
+def main(args):
+
+    # get ids (from args.id_file)
+    ids = []
+    with open (args.id_file, 'r') as f:
+        for line in f:
+            ids.append(line.strip())
+
+
+    fastq_fns = list(Path(args.input_fastq_dir).rglob('*.fastq'))
+    rst_futures = helper.multiprocessing_submit(find_reads,
+                                                fastq_fns,
+                                                n_process=args.threads,
+                                                id_list = ids)
+
+    rst_ls = []
+    for f in rst_futures:
+        rst_ls+=f.result()
+
+    print(len(rst_ls))
+    Bio.SeqIO.write(rst_ls, args.output_file, 'fastq')
+
+if __name__ == '__main__':
+    args = parse_arg()
+    main(args)
+
diff --git a/bin/generate_read_counts.sh b/bin/generate_read_counts.sh
index aea42cc..4c42399 100755
--- a/bin/generate_read_counts.sh
+++ b/bin/generate_read_counts.sh
@@ -39,7 +39,7 @@ do
     data="$(basename $sample_name)"
 
     # RAW FASTQ COUNTS
-    
+
     if [[ -s "$raw_fastqc" ]]
     then
         fastqc_counts=$(get_fastqc_counts "$raw_fastqc")
@@ -47,9 +47,9 @@ do
     else
         data="$data,"
     fi
-    
+
     # TRIM COUNTS
-    
+
     if [[ -s "$trim_fastqc" ]]
     then
         trim_counts=$(get_fastqc_counts "$trim_fastqc")
@@ -57,9 +57,9 @@ do
     else
         data="$data,"
     fi
-    
+
     # PREEXTRACT COUNTS
-    
+
     if [ -s "$extract_fastqc" ]
     then
         extract_counts=$(get_fastqc_counts "$extract_fastqc")
@@ -67,10 +67,9 @@ do
     else
         data="$data,"
     fi
-    
+
     # CORRECT COUNTS
-    
-    
+
     if [ -s $correct_csv ]
     then
         correct_counts=$(cut -f6 $correct_csv | awk '{if ($0 != "") {print $0}}' | wc -l)
diff --git a/bin/helper.py b/bin/helper.py
index fd068a1..3cdfa59 100644
--- a/bin/helper.py
+++ b/bin/helper.py
@@ -19,7 +19,7 @@ def reverse_complement(seq):
 	Returns: <str>
 		reverse_complement seq
 	'''
-	comp = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 
+	comp = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',
 					'a': 't', 'c': 'g', 'g': 'c', 't': 'a'}
 	letters = \
 		[comp[base] if base in comp.keys() else base for base in seq]
@@ -28,8 +28,8 @@ def reverse_complement(seq):
 def err_msg(msg):
 	CRED = '\033[91m'
 	CEND = '\033[0m'
-	print(CRED + msg + CEND)	
-        
+	print(CRED + msg + CEND)
+
 def warning_msg(msg, printit = True):
     CRED = '\033[93m'
     CEND = '\033[0m'
@@ -47,11 +47,11 @@ def green_msg(msg, printit = True):
         return CRED + msg + CEND
 
 def sliding_window_sum(array, window) :
-    cum = np.cumsum(array)  
+    cum = np.cumsum(array)
     return cum[window:] - cum[:-window]
 
 def sliding_window_mean(array, window) :
-    cum = np.cumsum(array)  
+    cum = np.cumsum(array)
     return (cum[window:] - cum[:-window]) / window
 
 
@@ -62,22 +62,22 @@ def __init__(self, **kwargs):
     def add(self, attr_name, attr_val, overwrite = True):
         if attr_name in __dict__.keys() and not overwrite:
             pass
-        else:    
+        else:
             self.__dict__[attr_name] = attr_val
     def rm(self, attr_name):
         if attr_name in self.__dict__.keys():
             del self.__dict__[attr_name]
     def __str__(self):
         return str(self.__dict__)
-    
+
     def check(self, attr_list, add_none = True, silent = True):
         """
-        Check whether the attributes in a given list are present. 
+        Check whether the attributes in a given list are present.
 
         Parameters
         ----------
         attr_list : LIST
-            list of strings of the attributes to check 
+            list of strings of the attributes to check
         add_none : BOOL
             whether or not to create the missed attributes with value None
         silent : BOOL
@@ -91,19 +91,19 @@ def check(self, attr_list, add_none = True, silent = True):
             assert isinstance(silent, bool)
         except (AttributeError, TypeError):
             raise AssertionError("add_none and silent must be bool variable.")
-        
+
         check_res = True
         for attr in attr_list:
             if attr not in self.__dict__.keys():
                 check_res = False
                 self.__dict__[attr] = None
         return check_res if not silent else True
-    
+
 
 def multiprocessing_submit(func, iterator, n_process=mp.cpu_count()-1 ,
-                           pbar=True, pbar_unit='Read',pbar_func=len, 
+                           pbar=True, pbar_unit='Read',pbar_func=len,
                            schduler = 'process', *arg, **kwargs):
-    """multiple processing or threading, 
+    """multiple processing or threading,
 
     Args:
         func: function to be run parallely
@@ -138,7 +138,7 @@ def result(self):
 
     if pbar:
         _pbar = tqdm(unit=pbar_unit, desc='Processed')
-        
+
     # run in single process/thread if n_process < 1
     if n_process <= 1:
         for it in iterator:
@@ -151,7 +151,7 @@ def result(self):
     max_queue = n_process
     futures = {}
     n_job_in_queue = 0
-    
+
     # make sure the result is yield in the order of submit.
     job_idx = 0
     job_completed = {}
@@ -171,12 +171,12 @@ def result(self):
         # batch size as value, release the cpu as soon as one job finished
         job = next(as_completed(futures), None)
 
-        # yield the completed job in the order of submit  
+        # yield the completed job in the order of submit
         if job is not None:
             job_completed[futures[job][1]] = job, futures[job][0]
             del futures[job]
 
-        # 
+        #
         if job is None and i is None and len(job_completed)==0:
             break
 
@@ -187,25 +187,25 @@ def result(self):
                 _pbar.update(job_completed[job_to_yield][1])
             yield job_completed[job_to_yield][0]
             del job_completed[job_to_yield]
-            
+
             # submit new job
             i = next(iterator, None)
             if i is not None:
                 futures[executor.submit(func, i, *arg, **kwargs)] = (pbar_func(i),job_idx)
                 job_idx += 1
-                
+
             job_to_yield += 1
 
-# multiproces panda data frame  
+# multiproces panda data frame
 def procee_batch(df,  row_func, *arg, **kwargs):
         return df.apply(row_func, axis=1, *arg, **kwargs)
 
 def df_multiproceccing_apply(df, func, n_process, aggr_func=pd.concat, pbar = True, pbar_unit='Read',pbar_func=len,*arg, **kwargs):
-    """This is a re-implementation which is very similar to dask apply. 
+    """This is a re-implementation which is very similar to dask apply.
     """
     def sub_df_generator(df, n_part):
-        """ 
-        Generator: split a large df by row into multiple 
+        """
+        Generator: split a large df by row into multiple
         """
         sub_size = int(np.ceil(len(df) / n_part))
         batch_start, batch_end = 0, sub_size
@@ -215,25 +215,25 @@ def sub_df_generator(df, n_part):
             batch_end += sub_size
 
     #run 100 batch per process on average but ensure at least 1000 reads per batch
-    num_batch = min(int(len(df)/1000)+1, n_process*100) 
+    num_batch = min(int(len(df)/1000)+1, n_process*100)
     df_iter = sub_df_generator(df, n_part=num_batch)
-    
+
     rst_futures = multiprocessing_submit(procee_batch, df_iter, n_process=n_process ,
-                           pbar = pbar, pbar_unit=pbar_unit,pbar_func=pbar_func, 
+                           pbar = pbar, pbar_unit=pbar_unit,pbar_func=pbar_func,
                            schduler = 'process', row_func = func, *arg, **kwargs)
 
     for i in rst_futures:
         yield i.result()
     # rsts = [x.result() for x in rst_futures]
     # return aggr_func(rsts)
-    
+
 # concatenate multiple files
 def concatenate_files(output_file, *input_files):
     with open(output_file, 'wb') as outfile:
         for input_file in input_files:
             with open(input_file, 'rb') as infile:
                 shutil.copyfileobj(infile, outfile)
-            os.remove(input_file)    
+            os.remove(input_file)
 
 # get file with a certian extensions
 def get_files(search_dir, extensions, recursive=True):
@@ -257,7 +257,7 @@ def check_exist(file_list):
     if exit_code == 1:
         sys.exit()
 
-# split any iterator in to batches  
+# split any iterator in to batches
 def batch_iterator(iterator, batch_size):
     """generateor of batches of items in a iterator with batch_size.
     """
@@ -266,7 +266,7 @@ def batch_iterator(iterator, batch_size):
     for entry in iterator:
         i += 1
         batch.append(entry)
-        
+
         if i == batch_size:
             yield batch
             batch = []
@@ -285,7 +285,7 @@ def fastq_parser(file_handle):
         next(file_handle) # skip  '+'
         q_letter = next(file_handle)
         yield read_tuple(id[1:].split()[0], seq.strip(), q_letter.strip())
-        
+
 
 
 # validate filename (check suffix):
@@ -300,6 +300,6 @@ def check_suffix(filename, suffix_lst):
         return filename.endswith(suffix_lst)
     if any([filename.endswith(x) for x in suffix_lst]):
         return True
-    else: 
+    else:
         return False
 
diff --git a/bin/main.py b/bin/main.py
index cab5d5b..51b1611 100755
--- a/bin/main.py
+++ b/bin/main.py
@@ -13,7 +13,7 @@
     3. BC rank plot
     4. csv file of the BC whitelist
 """
-  
+
 import sys
 import shlex
 import os
@@ -48,23 +48,23 @@
 logger.setLevel(logging.DEBUG)
 
 def parse_arg(argv):
-    
+
     def print_help():
         help_message=\
             f'''
             Description:
                 BLAZE2 is a tool for demultiplexing 10X single cell long-read RNA-seq data.
-                It takes fastq files as input and output a whitelist of barcodes and a fastq 
+                It takes fastq files as input and output a whitelist of barcodes and a fastq
                 with demultiplexed reads.
 
             Usage: blaze  --expect-cells <INT> [OPTIONS] <fastq directory>
 
             Required argument:
                 One of the following two options is required unless whitelisting step is turned off:
-                    --expect-cells <INT> 
+                    --expect-cells <INT>
                             Expected number of cells.
                     --count-threshold <INT>
-                            Count threshold of high-quality putative barcodes used to determine the whitelist. 
+                            Count threshold of high-quality putative barcodes used to determine the whitelist.
                 Note that the --count-threshold option is ignored if --expect-cells is specified.
 
             Options:
@@ -75,7 +75,7 @@ def print_help():
                     Filename of output files. Default: --output-prefix {DEFAULT_PREFIX}
                     Note that the output can be directed to a different directory by specifying
                     the path in the prefix. E.g., --output-prefix /path/to/output/prefix
-                
+
                 --output-fastq <fastq filename>
                     Filename of output fastq file name. Default: --output-fastq {DEFAULT_GRB_OUT_FASTQ}
                     Note that if the filename has to end with .fastq, .fq, .fastq.gz or .fq.gz.
@@ -85,35 +85,35 @@ def print_help():
 
                 --no-whitelisting:
                     Skip the whitelisting step and assign reads to full whitelist specified by --full-bc-whitelist.
-                
+
                 --max-edit-distance <INT>
-                    Maximum edit distance allowed between a putative barcode and a barcode 
+                    Maximum edit distance allowed between a putative barcode and a barcode
                     for a read/putative barcdoe to be assigned to the barcode. Default: --max-edit-distance {DEFAULT_ASSIGNMENT_ED}
-                
+
                 --kit-version <3v2 or 3v3 or 5v2>:
                     Choose from 10X Single Cell 3ʹ gene expression v2 or v3 or 5' gene expression. If using other
-                    protocols, please do not specify this option and specify --full-bc-whitelist instead. By default, 
+                    protocols, please do not specify this option and specify --full-bc-whitelist instead. By default,
                     `--kit_version v3`  will be used if --full-bc-whitelist is not specified.
 
                 --minQ <INT>:
-                    Putative BC contains one or more bases with Q<minQ is not counted 
+                    Putative BC contains one or more bases with Q<minQ is not counted
                     in the "Putative BC rank plot". Default: --minQ=15
-                
+
                 --full-bc-whitelist <path to file>
-                    txt file containing all the possible BCs. You may provide your own whitelist. 
-                    No need to specify this if users want to use the 10X whilelist. The correct 
+                    txt file containing all the possible BCs. You may provide your own whitelist.
+                    No need to specify this if users want to use the 10X whilelist. The correct
                     version of 10X whilelist will be determined based on 10X kit version.
 
                 --overwrite
-                    Overwrite the old file when the output file(s) exist. If not specified, 
+                    Overwrite the old file when the output file(s) exist. If not specified,
                     the steps generating the existing file(s) will be skipped.
 
                 --threads <INT>
                     <INT>: Number of threads used <default: # of available cpus - 1>
 
                 --batch-size <INT>
-                    <INT>: Number of reads this program process together as a batch. Not that if 
-                    the specified number larger than the number of reads in each fastq files, the 
+                    <INT>: Number of reads this program process together as a batch. Not that if
+                    the specified number larger than the number of reads in each fastq files, the
                     batch size will be forced to be the number of reads in the file. <Default: 1000>
 
                 --minimal_stdout
@@ -123,7 +123,7 @@ def print_help():
                 --high-sensitivity-mode:
                     Turn on the sensitivity mode, which increases the sensitivity of barcode
                     detections but potentially increase the number false/uninformative BC in
-                    the whitelist. 
+                    the whitelist.
                     Note that --emptydrop is recommanded specified with this mode (See details below).
 
             Empty droplet BCs
@@ -135,7 +135,7 @@ def print_help():
 
             '''
         print(textwrap.dedent(help_message))
-   
+
     if not argv:
         argv = sys.argv
     else:
@@ -144,11 +144,11 @@ def print_help():
         elif isinstance(argv, list):
             argv = ['blaze.py'] + argv
         else:
-            helper.err_msg("Error: Invalid argument input, the argument should be a string or a list.") 
+            helper.err_msg("Error: Invalid argument input, the argument should be a string or a list.")
             sys.exit(1)
-    
-    
-    # Default 
+
+
+    # Default
     out_fastq_fn = DEFAULT_GRB_OUT_FASTQ
     prefix = DEFAULT_PREFIX
     overwrite = False
@@ -158,7 +158,7 @@ def print_help():
     min_phred_score = DEFAULT_GRB_MIN_SCORE
     kit = DEFAULT_GRB_KIT
     batch_size=1000
-    high_sensitivity_mode = False   
+    high_sensitivity_mode = False
     emptydrop_max_count = np.inf
     do_demultiplexing = True
     do_whitelisting = True
@@ -167,7 +167,7 @@ def print_help():
     count_t = None
 
     # Read from options
-    try: 
+    try:
         opts, args = getopt.getopt(argv[1:],"h",
                     ["help","threads=","minQ=","full-bc-whitelist=","high-sensitivity-mode",
                      "output-prefix=", "expect-cells=", "overwrite",
@@ -175,10 +175,10 @@ def print_help():
                      "no-demultiplexing", "max-edit-distance=", "minimal_stdout",
                      "no-whitelisting", "count-threshold="])
     except getopt.GetoptError:
-        helper.err_msg("Error: Invalid argument input") 
+        helper.err_msg("Error: Invalid argument input")
         print_help()
-        sys.exit(1)    
-    
+        sys.exit(1)
+
     for opt, arg in opts:
         if opt in  ("-h", "--help"):
             print_help()
@@ -194,11 +194,11 @@ def print_help():
         elif opt == '--threads':
             n_process = int(arg)
         elif opt == '--minQ':
-            min_phred_score = int(arg)  
+            min_phred_score = int(arg)
         elif opt == '--full-bc-whitelist':
-            full_bc_whitelist = arg  
+            full_bc_whitelist = arg
         elif opt == "--out-putative-bc":
-            out_raw_bc_fn = arg      
+            out_raw_bc_fn = arg
         elif opt == "--kit-version":
             kit = arg.lower()
         elif opt == "--high-sensitivity-mode":
@@ -226,7 +226,7 @@ def print_help():
     summary_fn = prefix + DEFAULT_BC_STAT_FN
 
     if kit not in ['3v2', '3v3', '5v2']:
-        helper.err_msg("Error: Invalid value of --kit-version (" + kit + "), please choose from 3v3 or 3v2 or 5v2") 
+        helper.err_msg("Error: Invalid value of --kit-version (" + kit + "), please choose from 3v3 or 3v2 or 5v2")
         sys.exit()
 
     if full_bc_whitelist:
@@ -241,13 +241,13 @@ def print_help():
 
     # Read from args
     if not args:
-        helper.err_msg("Error: Missing fastq directory.")   
+        helper.err_msg("Error: Missing fastq directory.")
         print_help()# print help doc when no command line args provided
         sys.exit(0)
 
     # check required argument
     if not exp_cells and count_t is None and do_whitelisting:
-        helper.err_msg("--expect-cells or --count-threshold is required to build the whitelist!") 
+        helper.err_msg("--expect-cells or --count-threshold is required to build the whitelist!")
         sys.exit(1)
     if exp_cells and count_t is not None:
         helper.warning_msg("--expect-cells and --count-threshold are both specified. --expect-cells will be used.")
@@ -256,7 +256,7 @@ def print_help():
     # check input
     fastq_dir = args[0]
     helper.check_exist([full_bc_whitelist, fastq_dir])
-    
+
     if os.path.isdir(fastq_dir):
         fastq_fns = helper.get_files(fastq_dir, ['*.fastq', '*.fq', '*.fastq.gz', '*.fq.gz'])
     elif os.path.isfile(fastq_dir):
@@ -300,7 +300,7 @@ def print_help():
             f"Error in filename configuration:"
             f"Filename '{out_plot_fn}' should end with '.png'. Please check the config.py file in BLAZE.")
         sys.exit(1)
-    
+
     # helper.warning_msg(f"Filename {out_fastq_fn} exists, will output demultiplexed reads into {out_fastq_fn}.")
 
     return fastq_fns, out_fastq_fn, n_process, exp_cells ,min_phred_score, \
@@ -327,7 +327,7 @@ def get_raw_bc_from_reads(reads, min_q=0, kit=None):
     -------
     1. Counter of high-confidence putative BC
     2. Counter of high-confidence putative BC
-    3. pd.DataFrame containing all putative BCs 
+    3. pd.DataFrame containing all putative BCs
     """
     # init
     read_ids = []
@@ -341,11 +341,11 @@ def get_raw_bc_from_reads(reads, min_q=0, kit=None):
     post_umi_flankings = []
 
     for i,r in enumerate(reads):
-        
-        # create read object 
-        read = polyT_adaptor_finder.Read(read_id = r.id, sequence=str(r.seq), 
-                    phred_score=r.q_letter, kit=kit)    
-        
+
+        # create read object
+        read = polyT_adaptor_finder.Read(read_id = r.id, sequence=str(r.seq),
+                    phred_score=r.q_letter, kit=kit)
+
 
         read.get_strand_and_raw_bc()
         read_ids.append(read.id)
@@ -355,11 +355,11 @@ def get_raw_bc_from_reads(reads, min_q=0, kit=None):
         trim_idxs.append(read.polyT_trimming_idx)
         pre_bc_flankings.append(read.pre_bc_flanking)
         post_umi_flankings.append(read.post_umi_flanking)
-        
-        if read.raw_bc_min_q and read.raw_bc_min_q >= min_q:     
+
+        if read.raw_bc_min_q and read.raw_bc_min_q >= min_q:
             raw_bc.append(read.raw_bc)
-            
-        if read.raw_bc_min_q and read.raw_bc_min_q < min_q:  
+
+        if read.raw_bc_min_q and read.raw_bc_min_q < min_q:
             raw_bc_pass.append(100) #tag for low quality putative bc
         else:
             raw_bc_pass.append(read.adaptor_polyT_pass)
@@ -379,7 +379,7 @@ def get_raw_bc_from_reads(reads, min_q=0, kit=None):
 def qc_report(pass_count, min_phred_score, stdout=True, out_fn=None):
     '''
     Generate report for the putative barcode detection.
-    Print stats for 
+    Print stats for
         a. # of input read in total (only look at primary alignment if it's BAM)
         b. # and % read pass the polyT and adaptor searching process.
         c. # and % with poly T and adaptor find in both ends
@@ -392,20 +392,20 @@ def qc_report(pass_count, min_phred_score, stdout=True, out_fn=None):
         min score used to filter out putative BC
     '''
     total_read = sum(pass_count.values())
-    
+
     print_message=textwrap.dedent(
         f'''
         \n----------------------stats of the putative barcodes--------------------------
 
-        Total number of reads: 
+        Total number of reads:
             {total_read:,}
-        Reads with unambiguous polyT and adapter positions found:            
+        Reads with unambiguous polyT and adapter positions found:
             {pass_count[0]+ pass_count[100]:,} ({(pass_count[0]+ pass_count[100])/total_read*100:.2f}% of all reads)
             {pass_count[0]:,} in which all bases in the putative BC have Q>={min_phred_score}
-        Failed Reads: 
-            no polyT and adapter positions found: 
+        Failed Reads:
+            no polyT and adapter positions found:
                 {pass_count[1]:,} ({pass_count[1]/total_read*100:.2f}% of all reads)
-            polyT and adapter positions found in both end (fail to determine strand): 
+            polyT and adapter positions found in both end (fail to determine strand):
                 {pass_count[2]:,} ({pass_count[2]/total_read*100:.2f}% of all reads)
             multiple polyT and adapter found in one end
                 {pass_count[10]:,} ({pass_count[10]/total_read*100:.2f}% of all reads)
@@ -417,22 +417,22 @@ def qc_report(pass_count, min_phred_score, stdout=True, out_fn=None):
         with open(out_fn, 'w') as f:
             f.write(print_message + '\n')
 
-    
-def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None, 
-                    count_t=None, high_sensitivity_mode=False, 
-                    output_empty = False, empty_max_count = np.inf, 
+
+def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
+                    count_t=None, high_sensitivity_mode=False,
+                    output_empty = False, empty_max_count = np.inf,
                     out_plot_fn = DEFAULT_KNEE_PLOT_FN):
-    f"""    
-    Get a whitelist from all putative cell bc with high-confidence putative bc counts. 
-    If the expect number is provided (default), a quantile-based threshold will be 
-    calculated to determine the exact cells to be output. Otherwise, a user-specified 
+    """
+    Get a whitelist from all putative cell bc with high-confidence putative bc counts.
+    If the expect number is provided (default), a quantile-based threshold will be
+    calculated to determine the exact cells to be output. Otherwise, a user-specified
     ount threshold will be used and the cells/Barocdes with counts above the threshold will be output.
-    
+
     If high_sensitivity_mode = True, the high sensitivity (HS) mode is turned on which uses
-    more relaxed threshold  
+    more relaxed threshold
 
-    If in output_empty=True, a list of BCs that are most likely corresponding to 
-    empty droplets will also be produced autimatically , which might be useful in 
+    If in output_empty=True, a list of BCs that are most likely corresponding to
+    empty droplets will also be produced autimatically , which might be useful in
     downstream analysis.
         Criteria of selecting these BC:
             1. BC in 10x full whitelist, and
@@ -449,10 +449,10 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
         ValueError: No valid exp_cells or count_t specified
 
     Returns:
-        dict 1: 
+        dict 1:
             key: barcodes selected
             values: high-confidence putative BC counts
-       
+
         list  (high_sensitivity_mode only):
             barcodes most likely associated to empty droplet
     """
@@ -462,15 +462,15 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
         percentile_count_thres = high_sensitivity_threshold_calculation
     else:
         percentile_count_thres = default_count_threshold_calculation
-    
-    whole_whitelist = []    
+
+    whole_whitelist = []
 
     if full_bc_whitelist.endswith('.zip'):
         with zipfile.ZipFile(full_bc_whitelist) as zf:
             # check if there is only 1 file
             assert len(zf.namelist()) == 1
 
-            with io.TextIOWrapper(zf.open(zf.namelist()[0]), 
+            with io.TextIOWrapper(zf.open(zf.namelist()[0]),
                                                     encoding="utf-8") as f:
                 for line in f:
                     whole_whitelist.append(line.strip())
@@ -478,11 +478,11 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
         with open(full_bc_whitelist, 'r') as f:
             for line in f:
                 whole_whitelist.append(line.strip())
-    
+
     whole_whitelist = set(whole_whitelist)
 
     raw_bc_count = {k:v for k,v in raw_bc_count.items() if k in whole_whitelist}
-    
+
     # determine real bc based on the count threshold
     if count_t is not None:
         knee_plot(list(raw_bc_count.values()), count_t, out_plot_fn)
@@ -504,7 +504,7 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
 
                 # we don't need too much BC in this list
                 if len(ept_bc) >  DEFAULT_EMPTY_DROP_NUM:
-                    break      
+                    break
             return cells_bc, ept_bc
 
     elif exp_cells:
@@ -512,7 +512,7 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
         knee_plot(list(raw_bc_count.values()), t, out_plot_fn)
 
         cells_bc = {k:v for k,v in raw_bc_count.items() if v > t}
-        
+
         if not output_empty:
             return cells_bc, []
         else:
@@ -532,7 +532,7 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist, exp_cells=None,
                     break
             return cells_bc, ept_bc
 
-    else: 
+    else:
         raise ValueError('Invalid value of count_t and/or exp_cells.')
 
 def knee_plot(counts, threshold=None, out_fn = 'knee_plot.png'):
@@ -578,18 +578,18 @@ def read_batch_generator(fastq_fns, batch_size):
 
 
 def main(argv=None):
-    
+
     fastq_fns, out_fastq_fn, n_process, exp_cells ,min_phred_score, \
         full_bc_whitelist, out_raw_bc_fn, out_whitelist_fn, \
         high_sensitivity_mode, batch_size, out_emptydrop_fn, \
         emptydrop_max_count, overwrite, out_plot_fn, do_demultiplexing, \
         max_edit_distance, summary_fn,minimal_out, do_whitelisting, \
         count_t, kit = parse_arg(argv)
-    
+
     # Start running: Welcome logo
     if not minimal_out:
         print(textwrap.dedent(
-            f'''\n\nWelcome to 
+            f'''\n\nWelcome to
                 {BLAZE_LOGO}
         '''))
 
@@ -603,9 +603,9 @@ def main(argv=None):
                  printit = False))
         logger.info(f'Getting putative barcodes from {len(fastq_fns)} FASTQ files...')
         read_batchs = read_batch_generator(fastq_fns, batch_size=batch_size)
-    
+
         rst_futures = helper.multiprocessing_submit(get_raw_bc_from_reads,
-                                                read_batchs, n_process=n_process, 
+                                                read_batchs, n_process=n_process,
                                                 min_q=min_phred_score, kit=kit)
 
         raw_bc_pass_count = defaultdict(int)
@@ -618,35 +618,35 @@ def main(argv=None):
                 rst_df.to_csv(out_raw_bc_fn, index=False)
             else:
                 rst_df.to_csv(out_raw_bc_fn, mode='a', index=False, header=False)
-        
+
         helper.green_msg(f'Putative barcode table saved in {out_raw_bc_fn}')
-        
+
         # ----------------------stats of the putative barcodes--------------------------
-        qc_report(raw_bc_pass_count, min_phred_score=min_phred_score, 
+        qc_report(raw_bc_pass_count, min_phred_score=min_phred_score,
                   out_fn=summary_fn, stdout= not minimal_out)
 
-    
+
     else:
         logger.info(helper.warning_msg(textwrap.dedent(
             f"""
-            Warning: `{out_raw_bc_fn}` exists. BLAZE would NOT re-generate the file and the existing file 
+            Warning: `{out_raw_bc_fn}` exists. BLAZE would NOT re-generate the file and the existing file
             will be directly used for downstream steps. If you believe it needs to be updated, please
-            change the --output_prefix or remove/rename the existing file. 
-            
+            change the --output_prefix or remove/rename the existing file.
+
             Note: there is no need to update this file if the input data remain the same and the previous
-            run that generated this file finished successfully. It wouldn't change with other specified 
+            run that generated this file finished successfully. It wouldn't change with other specified
             arguments . However if you are running using a modified config.py file or the existing  `{out_raw_bc_fn}`
             was generated by a different version of BLAZE, updating the file is suggested.
             """
         ), printit = False))
 
-        
+
     ######################
     ###### Whitelisting
     ######################
     if do_whitelisting and (not os.path.exists(out_whitelist_fn) or overwrite):
         dfs = pd.read_csv(out_raw_bc_fn, chunksize=1_000_000)
-        
+
         # get bc count dict (filtered by minQ)
         raw_bc_count = Counter()
         for df in tqdm(dfs, desc = 'Counting high-quality putative BC'):
@@ -670,7 +670,7 @@ def main(argv=None):
 
         try:
             bc_whitelist, ept_bc = get_bc_whitelist(raw_bc_count,
-                                    full_bc_whitelist, 
+                                    full_bc_whitelist,
                                     exp_cells=exp_cells,
                                     count_t=count_t,
                                     high_sensitivity_mode=high_sensitivity_mode,
@@ -684,7 +684,7 @@ def main(argv=None):
             with open(out_emptydrop_fn, 'w') as f:
                 for k in ept_bc:
                     f.write(k+'\n')
-                helper.green_msg(f'Empty droplet barcode list saved as `{out_emptydrop_fn}`.')    
+                helper.green_msg(f'Empty droplet barcode list saved as `{out_emptydrop_fn}`.')
 
         except Exception as e:
             logger.exception(e)
@@ -708,7 +708,7 @@ def main(argv=None):
                 f"NOTE: BLAZE will use existing `{out_whitelist_fn}` for the downstread steps and will not re-generate the {out_plot_fn}."
                 f"If the file is required, please remove/rename the existing `{out_whitelist_fn}` and rerun."
             , printit = False))
-    
+
     elif not do_whitelisting:
         logger.info(helper.warning_msg(
                 f"NOTE: Whitelisting step is skipped as specified by --no-whitelisting. BLAZE will assign reads to {full_bc_whitelist}."
@@ -725,13 +725,13 @@ def main(argv=None):
                 printit = False
             ))
         logger.info("Assigning reads to whitelist.\n")
-        read_assignment.assign_read(fastq_fns, 
-                                        out_fastq_fn, 
-                                        out_raw_bc_fn, 
+        read_assignment.assign_read(fastq_fns,
+                                        out_fastq_fn,
+                                        out_raw_bc_fn,
                                         out_whitelist_fn,
                                         max_edit_distance,
                                         n_process,
-                                        out_fastq_fn.endswith('.gz'), 
+                                        out_fastq_fn.endswith('.gz'),
                                         batch_size)
     else:
         logger.info(helper.warning_msg(
diff --git a/bin/polyT_adaptor_finder.py b/bin/polyT_adaptor_finder.py
index 026876d..c331b19 100644
--- a/bin/polyT_adaptor_finder.py
+++ b/bin/polyT_adaptor_finder.py
@@ -33,17 +33,17 @@ def __init__(self, read_id, sequence, phred_score=None, strand = None, kit=None,
         self.kit = kit
         for key in kwargs.keys():
             self.__dict__[key] = kwargs[key]
-            
+
     def add(self, attr_name, attr_val, overwrite = True):
         if attr_name in __dict__.keys() and not overwrite:
             pass
-        else:    
+        else:
             self.__dict__[attr_name] = attr_val
-    
+
     def find_adapter_5_prime(self, read=None, strand=None, adapter_seq=ADPT_SEQ,
                              num_nt=ADPT_WIN,max_ed=ADPT_MAC_MATCH_ED,
                              tso_seq=TSO_SEQ):
-        
+
         strand = self._strand if not strand else strand
         read = self.seq if not read else read
 
@@ -61,7 +61,7 @@ def find_adapter_5_prime(self, read=None, strand=None, adapter_seq=ADPT_SEQ,
                 return {'-':[max(0,tso_seq_start-d2)+sub_seq_end+1]} if sub_seq_end > 0 else {}
             else:
                 return {}
-        
+
         if strand == '+':
             read = helper.reverse_complement(read[-num_nt:])
             adpt_ends = self.find_adapter_5_prime(
@@ -70,22 +70,22 @@ def find_adapter_5_prime(self, read=None, strand=None, adapter_seq=ADPT_SEQ,
 
             return {'+':adpt_ends} if len(adpt_ends) else {}
         else:
-            
+
             fwd_strand = self.find_adapter_5_prime(
-                strand='-', adapter_seq=adapter_seq, 
+                strand='-', adapter_seq=adapter_seq,
                 num_nt=num_nt, tso_seq=tso_seq)
             rev_strand = self.find_adapter_5_prime(
-                strand='+', adapter_seq=adapter_seq, 
+                strand='+', adapter_seq=adapter_seq,
                 num_nt=num_nt, tso_seq=tso_seq)
 
             rst = {**{k:v for k,v in fwd_strand.items() if len(v)},
                    **{k:v for k,v in rev_strand.items() if len(v)}}
             return rst
 
-    def find_adaptor_3_prime(self, read=None, strand=None, adaptor_seq=ADPT_SEQ, 
+    def find_adaptor_3_prime(self, read=None, strand=None, adaptor_seq=ADPT_SEQ,
                         num_nt=ADPT_WIN, max_ed=ADPT_MAC_MATCH_ED):
-        
-        def find_poly_T(seq, poly_T_len=PLY_T_LEN, 
+
+        def find_poly_T(seq, poly_T_len=PLY_T_LEN,
                               min_match_prop=SEQ_SUFFIX_MIN_MATCH_PROP):
             '''Find poly T in seq
             Parameters
@@ -110,7 +110,7 @@ def find_poly_T(seq, poly_T_len=PLY_T_LEN,
             # convert to np_array T -> 1 and ACG -> 0
             read_code = np.array([int(x == 'T') for x in seq])
             T_prop = helper.sliding_window_mean(read_code, poly_T_len)
-            return np.where(T_prop >= min_match_prop)[0]  
+            return np.where(T_prop >= min_match_prop)[0]
 
         strand = self._strand if not strand else strand
         read = self.seq if not read else read
@@ -130,7 +130,7 @@ def find_poly_T(seq, poly_T_len=PLY_T_LEN,
                     if idx-d2 > win_end:
                         searching_win.append((win_start, win_end))
                         win_start, win_end = idx-d2, idx-d1
-                    else: 
+                    else:
                         win_end = idx-d1
                 searching_win.append((win_start, win_end))
 
@@ -141,34 +141,34 @@ def find_poly_T(seq, poly_T_len=PLY_T_LEN,
                     if sub_seq_end > 0:
                         adpt_ends.append(max(start,0)+sub_seq_end+1)
 
-            return {'-':list(set(adpt_ends))} if len(adpt_ends) else {}    
+            return {'-':list(set(adpt_ends))} if len(adpt_ends) else {}
+
 
-        
         # take reverse complement if read is coming from transcript strand (with ployA instead ployT)
         if strand == '+':
             read = helper.reverse_complement(read[-num_nt:])
             adpt_ends = self.find_adaptor_3_prime(
-                            read, strand='-', adaptor_seq=adaptor_seq, 
+                            read, strand='-', adaptor_seq=adaptor_seq,
                             num_nt=num_nt).get('-',[])
             return {'+':adpt_ends} if len(adpt_ends) else {}
-                
+
         else:
             T_strand = self.find_adaptor_3_prime(
-                strand='-', adaptor_seq=adaptor_seq, 
+                strand='-', adaptor_seq=adaptor_seq,
                 num_nt=num_nt)
             A_strand = self.find_adaptor_3_prime(
-                strand='+', adaptor_seq=adaptor_seq, 
+                strand='+', adaptor_seq=adaptor_seq,
                 num_nt=num_nt)
             rst = {**{k:v for k,v in T_strand.items() if len(v)},
                    **{k:v for k,v in A_strand.items() if len(v)}}
             return rst
 
-    
-    def find_adaptor(self,read=None, strand=None, adaptor_seq=ADPT_SEQ, 
+
+    def find_adaptor(self,read=None, strand=None, adaptor_seq=ADPT_SEQ,
                         num_nt=ADPT_WIN, max_ed=ADPT_MAC_MATCH_ED, tso_seq=TSO_SEQ):
         '''
         find adaptor from a read
-    
+
         Parameters
         ----------
         read : STR
@@ -178,7 +178,7 @@ def find_adaptor(self,read=None, strand=None, adaptor_seq=ADPT_SEQ,
             Transcript strand, this function will find adaptor in '-' poly T strand
             Will do a reverse complement if strand == '-'
         adaptor_seq : STR
-            10X adaptor sequence, the full-length adaptor sequence is 
+            10X adaptor sequence, the full-length adaptor sequence is
             'CTACACGACGCTCTTCCGATCT' (22nt). Last 12nt is used by default.
         max_ed : float
             max edit distance in the adaptor alignment. Default: 2
@@ -210,7 +210,7 @@ def get_strand_and_raw_bc(self):
                 1: poly T and adaptor not found in any strand
                 2: poly T and adaptor found in both strand
                 10: multiple adaptor found in 20~50 nt of poly T upstream
-        
+
         Returns
         -------
         None.
@@ -227,7 +227,7 @@ def get_strand_and_raw_bc(self):
         if num_of_strand_find == 0:
             self.adaptor_polyT_pass += 1
         elif num_of_strand_find == 1:
-            # check single location 
+            # check single location
             adptors = set(list(adapt_dict.values())[0])
             num_of_adptor_find = len(adptors)
             if num_of_adptor_find == 0:
@@ -259,12 +259,12 @@ def get_strand_and_raw_bc(self):
 
             self.raw_bc_start = list(adapt_dict.values())[0][0]
             self.strand = list(adapt_dict.keys())[0]
-            
+
             if self.strand == '+':
                 self.raw_bc = \
                     helper.reverse_complement(
                             self.seq)[self.raw_bc_start: self.raw_bc_start+16]
-            else: 
+            else:
                 self.raw_bc = self.seq[self.raw_bc_start: self.raw_bc_start+16]
 
             if self.phred_score is not None:
@@ -273,15 +273,15 @@ def get_strand_and_raw_bc(self):
                 else:
                     phred_score = self.phred_score
 
-                bc_phred_score = phred_score[self.raw_bc_start: self.raw_bc_start+16]      
+                bc_phred_score = phred_score[self.raw_bc_start: self.raw_bc_start+16]
 
                 self.raw_bc_min_q = min([ord(x) for x in bc_phred_score]) - 33
 
             else:
                 self.raw_bc_min_q = None
-        
+
         # get poly_T_start_raw and adaptor_end_raw
-    
+
     @property
     def adator_trimming_idx(self):
         """sequencing after trimming the adaptor and UMI
@@ -294,7 +294,7 @@ def adator_trimming_idx(self):
             return None
         elif self._strand == '+':
             return int(-self.raw_bc_start-16-DEFAULT_UMI_SIZE)
-        else: 
+        else:
             return int(self.raw_bc_start+16+DEFAULT_UMI_SIZE)
 
     @property
@@ -306,10 +306,10 @@ def putative_UMI(self):
         elif self._strand == '+':
             return helper.reverse_complement(
                         self.seq)[self.raw_bc_start+16: self.raw_bc_start+16+DEFAULT_UMI_SIZE]
-        else: 
+        else:
             return self.seq[
                 self.raw_bc_start+16: self.raw_bc_start+16+DEFAULT_UMI_SIZE]
-    
+
     @property
     def pre_bc_flanking(self):
         if not self._get_strand_and_raw_bc_flag:
@@ -319,9 +319,9 @@ def pre_bc_flanking(self):
         elif self._strand == '+':
             return helper.reverse_complement(
                         self.seq)[self.raw_bc_start-DEFAULT_GRB_FLANKING_SIZE: self.raw_bc_start]
-        else: 
+        else:
             return self.seq[self.raw_bc_start-DEFAULT_GRB_FLANKING_SIZE: self.raw_bc_start]
-    
+
     @property
     def post_umi_flanking(self):
         """sequencing after trimming the adaptor and UMI
@@ -336,30 +336,30 @@ def post_umi_flanking(self):
             return helper.reverse_complement(
                         self.seq)[self.raw_bc_start+16+DEFAULT_UMI_SIZE: \
                                   self.raw_bc_start+16+DEFAULT_UMI_SIZE+ DEFAULT_GRB_FLANKING_SIZE]
-        else: 
+        else:
             return self.seq[
                 self.raw_bc_start+16+DEFAULT_UMI_SIZE: \
                     self.raw_bc_start+16+DEFAULT_UMI_SIZE+DEFAULT_GRB_FLANKING_SIZE]
-    
+
     @property
     def polyT_trimming_idx(self):
         """sequencing after trimming the adaptor and UMI
         Returns:
             index of the end of the polyT, negative if it's polyA strand
         """
-        
-        
+
+
         umi_end_idx = self.adator_trimming_idx
-        
+
         if umi_end_idx is None:
             return None
-        
+
         # take reverse complement if read is coming from transcript strand (with ployA instead ployT)
         if umi_end_idx < 0:
             reversed = True
             seq = helper.reverse_complement(self.seq)
             umi_end_idx = abs(umi_end_idx)
-        else: 
+        else:
             reversed = False
             seq = self.seq
 
@@ -367,15 +367,15 @@ def polyT_trimming_idx(self):
         polyT_start = seq.find('TTTT', umi_end_idx)
         if polyT_start == -1:
             return umi_end_idx
-        
+
         read_code = np.array([int(x == 'T') for x in seq])
         for idx, nt in enumerate(read_code[polyT_start:]):
             if nt == 1:
                 polyT_start += 1
-            elif sum(read_code[polyT_start:polyT_start+10]) >= 7: # hard code definition of polyT. plotT end when >3 of 10 consecutive letter are not T 
+            elif sum(read_code[polyT_start:polyT_start+10]) >= 7: # hard code definition of polyT. plotT end when >3 of 10 consecutive letter are not T
                 polyT_start += 1
             else:
-                break 
+                break
         return int(polyT_start) if not reversed else int(-polyT_start)
 
 
@@ -385,12 +385,12 @@ def strand(self):
             # set new strand
             helper.warning_msg(
                 """
-                No strand information is recorded, try 'get_strand_and_raw_bc' 
+                No strand information is recorded, try 'get_strand_and_raw_bc'
                 first.
                 """)
             return None
         return self._strand
-    
+
     @strand.setter
     def strand(self, v):
         if v == None:
@@ -423,5 +423,4 @@ def main():
 
 if __name__ == '__main__':
     main()
-    
-    
+
diff --git a/bin/polyT_trimmer.py b/bin/polyT_trimmer.py
index 06c5785..8f60aa2 100644
--- a/bin/polyT_trimmer.py
+++ b/bin/polyT_trimmer.py
@@ -51,15 +51,15 @@ def polyT_trimming_idx(seq, reverse=False, umi_end_idx=0, polyT_end_minT=7, poly
     polyT_start = seq.find('TTTT', umi_end_idx)
     if polyT_start == -1:
         return umi_end_idx
-    
+
     read_code = np.array([int(x == 'T') for x in seq])
     for idx, nt in enumerate(read_code[polyT_start:]):
         if nt == 1:
             polyT_start += 1
-        elif sum(read_code[polyT_start:polyT_start+polyT_end_check_win]) >= polyT_end_minT: 
+        elif sum(read_code[polyT_start:polyT_start+polyT_end_check_win]) >= polyT_end_minT:
             polyT_start += 1
         else:
-            break 
+            break
     return int(polyT_start) if not reverse else int(-polyT_start)
 
 
@@ -91,7 +91,7 @@ def _read_batch_generator(fastq_fns, batch_size):
                 batch_iter = helper.batch_iterator(fastq, batch_size=batch_size)
                 for batch in batch_iter:
                     yield batch
-                
+
         else:
             with open(fn) as handle:
                 fastq =\
@@ -114,13 +114,13 @@ def _proc_read_batches(read_batch, gz):
         else:
             seq = r.seq[int(polyT_end):]
             qscore = r.qscore[int(polyT_end):]
-        
+
         out_buffer += '@' + r.id + '\n'
         out_buffer += str(seq) + '\n'
         out_buffer += '+\n'
         out_buffer += qscore + '\n'
 
-    
+
     if gz:
         b_out_buffer = gzip.compress(out_buffer.encode('utf-8'))
     else:
@@ -152,13 +152,13 @@ def polyT_trimmer(fastq_fns, fastq_out, n_process, gz, batchsize):
 
     # multi-thread version
     else:
-        rst_futures = helper.multiprocessing_submit(_proc_read_batches, 
-                            r_batches, 
+        rst_futures = helper.multiprocessing_submit(_proc_read_batches,
+                            r_batches,
                             n_process=n_process,
                             schduler = "process",
                             pbar_func=len,
                             gz = gz)
-        
+
         # collect results
         with open(fastq_out, 'wb') as output_handle:
             for f in rst_futures:
@@ -169,5 +169,5 @@ def polyT_trimmer(fastq_fns, fastq_out, n_process, gz, batchsize):
 if __name__ == '__main__':
     fastq_fns, fastq_out, n_process, gzip_out = parse_command_line()
     polyT_trimmer(fastq_fns, fastq_out, n_process, gz=gzip_out, batchsize=4000)
-        
+
 
diff --git a/bin/read_assignment.py b/bin/read_assignment.py
index 2b3d25d..6bd7bf5 100644
--- a/bin/read_assignment.py
+++ b/bin/read_assignment.py
@@ -43,7 +43,7 @@ def _match_bc_row(row, whitelist, max_ed):
             '': if no barcode was found in the whitelist
             'ambiguous': if multiple barcode was found in the whitelist
         2. adjusted putative_umi
-        3. strand '+' for read with positive strand (start with BC,umi,polyT...), 
+        3. strand '+' for read with positive strand (start with BC,umi,polyT...),
                   '-' for read with negative strand
 
     """
@@ -51,21 +51,21 @@ def _match_bc_row(row, whitelist, max_ed):
         strand = ''
     elif row.polyT_end > 0:
         strand = '+'
-    else: 
+    else:
         strand = '-'
 
     if not row.putative_bc or row.putative_bc in whitelist:
         return [row.putative_bc, row.putative_umi, strand]
     else:
         bc = row.putative_bc
-    
+
     best_ed = max_ed
     bc_hit = ''
     bc_hit_end_idx = -1
     # extending the putative barcode from both sides for potential indels
     bc = row.pre_bc_flanking[-DEFAULT_ED_FLANKING:] + bc + row.putative_umi[:DEFAULT_ED_FLANKING]
     for i in whitelist:
-        ed, end_idx = sub_edit_distance(i, bc, best_ed) 
+        ed, end_idx = sub_edit_distance(i, bc, best_ed)
         if ed < best_ed:
             best_ed = ed
             bc_hit = i
@@ -74,12 +74,12 @@ def _match_bc_row(row, whitelist, max_ed):
             if not bc_hit:
                 bc_hit = i
                 bc_hit_end_idx = end_idx
-            else: 
+            else:
                 bc_hit = 'ambiguous'
                 best_ed -= 1
                 if best_ed < 0:
                     return ['', row.putative_umi, strand]
-    
+
     if bc_hit == 'ambiguous' or bc_hit == '':
         return ['', row.putative_umi, strand]
     else:
@@ -93,7 +93,7 @@ def _match_bc_row(row, whitelist, max_ed):
     elif umi_adj < 0:
         out_umi =  row.putative_bc[umi_adj:] + row.putative_umi[:umi_adj]
     return [bc_hit, out_umi, strand]
-            
+
 def batch_barcode_to_fastq(r_batches_with_idx, assignment_df ,gz = True):
     """Take a read batch, write fastq/fastq.gz to a tmp file
     """
@@ -105,9 +105,9 @@ def batch_barcode_to_fastq(r_batches_with_idx, assignment_df ,gz = True):
     read_idx = 0
     for r in read_batch:
         row = assignment_df.iloc[start_df_idx+read_idx]#the row in putative bc table
-        read_idx += 1 
+        read_idx += 1
         try:
-            assert row.read_id == r.id            
+            assert row.read_id == r.id
         except AssertionError:
             helper.err_msg("Different order in putative bc file and input fastq!")
             sys.exit()
@@ -121,12 +121,12 @@ def batch_barcode_to_fastq(r_batches_with_idx, assignment_df ,gz = True):
         else:
             seq = r.seq[int(row.polyT_end):]
             qscore = r.qscore[int(row.polyT_end):]
-        
+
         out_buffer += f"@{row.BC_corrected}_{row.putative_umi}#{row.read_id}_{row.strand}\n"
         out_buffer += str(seq) + '\n'
         out_buffer += '+\n'
         out_buffer += qscore + '\n'
-    
+
     output_handle.write(out_buffer)
     output_handle.close()
     return temp_file.name
@@ -150,7 +150,7 @@ def _read_and_bc_batch_generator_with_idx(fastq_fns, putative_bc_csv, batch_size
                     (read_fastq(title, sequence, qscore) for title, sequence, qscore in helper.fastq_parser(handle))
 
                 batch_iter = helper.batch_iterator(fastq, batch_size=batch_size)
-                
+
                 for batch in batch_iter:
                     batch_len = len(batch)
                     batch_bc_df = pd.read_csv(
@@ -176,16 +176,16 @@ def _read_and_bc_batch_generator_with_idx(fastq_fns, putative_bc_csv, batch_size
                     read_idx += batch_len
     putative_bc_f.close()
 
-    
+
 def _assign_read_batches(r_batch, whitelist, max_ed, gz):
     """Single-thread function:
         Assign all putative barcode to whiteliested barcode
 
     Args:
-        r_batch (tuple): yield from read_and_bc_batch_generator_with_idx 
+        r_batch (tuple): yield from read_and_bc_batch_generator_with_idx
                                             (read_batch, start_idx, batch_bc_df)
-        whitelist (list): list of barcode 
-        n_process (int): number of process 
+        whitelist (list): list of barcode
+        n_process (int): number of process
         max_ed (int): maximum edit distance allowed for a assignment
 
     Returns:
@@ -199,7 +199,7 @@ def _assign_read_batches(r_batch, whitelist, max_ed, gz):
     df = df.fillna('')
     whitelist = set(whitelist)
     out_buffer = ''
-    
+
     new_cols = []
     for row in df.itertuples():
         new_cols.append(_match_bc_row(row, whitelist, max_ed))
@@ -211,7 +211,7 @@ def _assign_read_batches(r_batch, whitelist, max_ed, gz):
 
     for r, bc in zip(read_batch, df.itertuples()):
         try:
-            assert bc.read_id == r.id            
+            assert bc.read_id == r.id
         except AssertionError:
             helper.err_msg("Different order in putative bc file and input fastq!")
             sys.exit()
@@ -224,13 +224,13 @@ def _assign_read_batches(r_batch, whitelist, max_ed, gz):
         else:
             seq = r.seq[int(bc.polyT_end):]
             qscore = r.qscore[int(bc.polyT_end):]
-        
+
         out_buffer += f"@{bc.BC_corrected}_{bc.putative_umi}#{bc.read_id}_{bc.strand}\n"
         out_buffer += str(seq) + '\n'
         out_buffer += '+\n'
         out_buffer += qscore + '\n'
 
-    
+
     if gz:
         b_out_buffer = gzip.compress(out_buffer.encode('utf-8'))
     else:
@@ -240,20 +240,20 @@ def _assign_read_batches(r_batch, whitelist, max_ed, gz):
     # logger.info(helper.green_msg(f"Demultiplexing finshied: ", printit = False))
     # logger.info(helper.green_msg(f"Successfully demultiplexed reads / Total reads: {sum(df.BC_corrected!='')} / {len(df.BC_corrected)}. ", printit = False))
 
-def assign_read(fastq_fns, fastq_out, putative_bc_csv, 
+def assign_read(fastq_fns, fastq_out, putative_bc_csv,
                       whitelsit_csv, max_ed, n_process, gz, batchsize):
     """Main function: Demultiplex fastq files using putative barcode csv and whitelist csv
     """
     # greating generator for read and putative barcode batches
     r_batches = \
         _read_and_bc_batch_generator_with_idx(fastq_fns, putative_bc_csv, batchsize)
-    
+
     # read the whitelist
-    whitelist = [] 
+    whitelist = []
     with open(whitelsit_csv, 'r') as f:
         for line in f:
             whitelist.append(line.split('-')[0].strip())
-    
+
     # assign putative barcode to whitelist
     # single thread version
     if n_process == 1:
@@ -272,15 +272,15 @@ def assign_read(fastq_fns, fastq_out, putative_bc_csv,
 
     # multi-thread version
     else:
-        rst_futures = helper.multiprocessing_submit(_assign_read_batches, 
-                            r_batches, 
+        rst_futures = helper.multiprocessing_submit(_assign_read_batches,
+                            r_batches,
                             n_process=n_process,
                             schduler = "process",
                             pbar_func=lambda x: len(x[0]),
                             whitelist = whitelist,
                             max_ed = max_ed,
                             gz = gz)
-        
+
         # collect results
         demul_count_tot = 0
         count_tot = 0
diff --git a/bin/update_whitelist.py b/bin/update_whitelist.py
index 830a5c2..1e8b8b2 100644
--- a/bin/update_whitelist.py
+++ b/bin/update_whitelist.py
@@ -18,11 +18,11 @@ def parse_arg():
         description=textwrap.dedent(
         '''
         This script can be used to generate a new whitelist from the putative bc table
-        output from 'blaze.py'. Users may specify different argment used in 
+        output from 'blaze.py'. Users may specify different argment used in
         'blaze.py' to obtain a different whitelist.
         '''),
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    
+
     # Required positional argument
     parser.add_argument('putative_bc_csv', type=str,
                         help='Filename of the putative_bc csv file output from blaze.py')
@@ -40,13 +40,13 @@ def parse_arg():
     parser.add_argument('--kit-version', type=str, default='3v3',
                         help= textwrap.dedent(
                             '''
-                            Choose from v2 and v3 (for 10X Single Cell 3ʹ gene expression v2 or v3). 
+                            Choose from v2 and v3 (for 10X Single Cell 3ʹ gene expression v2 or v3).
                             '''))
     parser.add_argument('--minQ', type=int, default=15,
                         help= textwrap.dedent(
                             '''
-                            <INT>: Minimum phred score for all bases in a putative BC. 
-                            Reads whose putative BC contains one or more bases with 
+                            <INT>: Minimum phred score for all bases in a putative BC.
+                            Reads whose putative BC contains one or more bases with
                             Q<minQ is not counted in the "Putative BC rank plot".'''))
     parser.add_argument('--full-bc-whitelist', type=str, default=None,
                         help='''<path to file>: .txt file containing all the possible BCs. Users may provide
@@ -64,7 +64,7 @@ def parse_arg():
                     detections but potentially increase the number false/uninformative BC in
                     the whitelist.''')
     parser.add_argument('--emptydrop', action='store_true',
-                        help='''Output list of BCs corresponding to empty droplets (filename: {DEFAULT_EMPTY_DROP_FN}), 
+                        help='''Output list of BCs corresponding to empty droplets (filename: {DEFAULT_EMPTY_DROP_FN}),
                     which could be used to estimate ambiant RNA expressionprofile.''')
     parser.add_argument('--emptydrop-max-count', type=float, default=np.inf,
                         help=textwrap.dedent(
@@ -76,20 +76,20 @@ def parse_arg():
     args = parser.parse_args()
 
     if not args.expect_cells and not args.count_threshold:
-        helper.err_msg("Missing argument --expect-cells or --count-threshold.") 
+        helper.err_msg("Missing argument --expect-cells or --count-threshold.")
         sys.exit(1)
     if (args.expect_cells or args.high_sensitivity_mode) and args.count_threshold:
         helper.warning_msg(textwrap.dedent(
                 f'''
                 Warning: You have specified'--count-threshold'. Options
                 "--high_sensitivity_mode" and "--expect-cells" would be ignored if
-                specified.        
+                specified.
                 '''))
         args.high_sensitivity_mode = False
-    
+
     args.kit_version = args.kit_version.lower()
     if args.kit_version not in ['3v2', '3v3', '5v2']:
-        helper.err_msg("Error: Invalid value of --kit-version, please choose from 3v3 or 3v2 or 5v2") 
+        helper.err_msg("Error: Invalid value of --kit-version, please choose from 3v3 or 3v2 or 5v2")
         sys.exit()
 
     if args.full_bc_whitelist:
@@ -102,7 +102,7 @@ def parse_arg():
         elif args.kit_version == '3v2' or args.kit_version == '5v2':
             args.full_bc_whitelist = DEFAULT_GRB_WHITELIST_V2
 
-    # check file 
+    # check file
     helper.check_exist([args.full_bc_whitelist, args.putative_bc_csv])
     return args
 
@@ -111,9 +111,9 @@ def parse_arg():
 def main(args):
     # read table
     dfs = pd.read_csv(args.putative_bc_csv, chunksize=args.chunk_size)
-    
+
     # get bc count dict (filtered by minQ)
-    
+
     raw_bc_count = Counter()
     for df in tqdm(dfs, desc = 'Counting high-quality putative BC'):
         raw_bc_count += Counter(df[
@@ -121,10 +121,10 @@ def main(args):
 
     if args.high_sensitivity_mode:
         print('Preparing whitelist...(high-sensitivity-mode)')
-    else: 
+    else:
         print('Preparing whitelist...')
     bc_whitelist, ept_bc = get_bc_whitelist(raw_bc_count,
-                                    args.full_bc_whitelist, 
+                                    args.full_bc_whitelist,
                                     args.expect_cells,
                                     args.count_threshold,
                                     args.high_sensitivity_mode,
diff --git a/conf/base.config b/conf/base.config
index 904e6a3..ba3d12f 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -57,7 +57,4 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
-    withName:CUSTOM_DUMPSOFTWAREVERSIONS {
-        cache = false
-    }
 }
diff --git a/conf/modules.config b/conf/modules.config
index f2e8319..d0dca0d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -25,7 +25,6 @@ process {
             pattern: '*_versions.yml'
         ]
     }
-
 }
 
 /////////////
@@ -193,7 +192,7 @@ if (!params.skip_qc){
             ]
         }
     }
-
+    
     process {
         withName:'.*:BAM_SORT_STATS_SAMTOOLS_DEDUP:BAM_STATS_SAMTOOLS:.*' {
             ext.prefix = { "${meta.id}.dedup.sorted" }
@@ -230,6 +229,22 @@ if (!params.skip_qc) {
 
 }
 
+// READ COUNTS
+if (!params.skip_qc) {
+
+    process {
+        withName:'.*:READ_COUNTS' {
+            publishDir = [
+                path: { "${params.outdir}/batch_qcs/read_counts" },
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        }
+
+    }
+
+}
+
 /////////////////////
 // REFERENCE FILES //
 /////////////////////
@@ -307,7 +322,7 @@ process {
 
 // GUNZIP
 process {
-    withName: '.*:GUNZIP' {
+    withName: '.*:GUNZIP.*' {
         publishDir = [
             enabled: false
         ]
@@ -507,6 +522,15 @@ process {
     }
 }
 
+process {
+    withName:'.*:BAM_SORT_STATS_SAMTOOLS_TAGGED:.*' {
+        ext.prefix = { "${meta.id}.sorted" }
+        publishDir = [
+            enabled: false
+        ]
+    }
+}
+
 process {
     withName:'.*:BAM_SORT_STATS_SAMTOOLS_DEDUP:SAMTOOLS_SORT' {
         ext.prefix = { "${meta.id}.dedup.sorted" }
diff --git a/conf/test_full.config b/conf/test_full.config
index fab1e8d..b714c41 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -24,4 +24,6 @@ params {
     // Barcode options
     barcode_format       = "10X_3v3"
 
+    split_amount         = 500000
+
 }
diff --git a/docs/images/nf-core-scnanoseq_logo_dark.png b/docs/images/nf-core-scnanoseq_logo_dark.png
index d186ab3..fa6be46 100644
Binary files a/docs/images/nf-core-scnanoseq_logo_dark.png and b/docs/images/nf-core-scnanoseq_logo_dark.png differ
diff --git a/docs/images/nf-core-scnanoseq_logo_light.png b/docs/images/nf-core-scnanoseq_logo_light.png
index 3a65f7f..3b26fcd 100644
Binary files a/docs/images/nf-core-scnanoseq_logo_light.png and b/docs/images/nf-core-scnanoseq_logo_light.png differ
diff --git a/docs/usage.md b/docs/usage.md
index 007bc76..0146bf0 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -161,6 +161,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
   - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/)
 - `apptainer`
   - A generic configuration profile to be used with [Apptainer](https://apptainer.org/)
+- `wave`
+  - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later).
 - `conda`
   - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer.
 
diff --git a/modules.json b/modules.json
index 9cf09a5..6f0b053 100644
--- a/modules.json
+++ b/modules.json
@@ -8,137 +8,99 @@
                     "bamtools/split": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "0997b47c93c06b49aa7b3fefda87e728312cf2ca",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "nanoplot": {
                         "branch": "master",
                         "git_sha": "a31407dfaf0cb0d04768d5cb439fc6f4523a6981",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/nanoplot/nanoplot.diff"
                     },
                     "rseqc/readdistribution": {
                         "branch": "master",
                         "git_sha": "6c7d8f1d6247655e4bc4d97f37b68b2461f645f6",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/faidx": {
                         "branch": "master",
                         "git_sha": "aeb02a39d4c463598bfdcb2d964dbb7acbcf1298",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/flagstat": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
-                        "installed_by": [
-                            "bam_stats_samtools"
-                        ]
+                        "installed_by": ["bam_stats_samtools"]
                     },
                     "samtools/idxstats": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
-                        "installed_by": [
-                            "bam_stats_samtools"
-                        ]
+                        "installed_by": ["bam_stats_samtools"]
                     },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
-                        "installed_by": [
-                            "bam_sort_stats_samtools"
-                        ]
+                        "installed_by": ["bam_sort_stats_samtools"]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26",
-                        "installed_by": [
-                            "bam_sort_stats_samtools"
-                        ]
+                        "installed_by": ["bam_sort_stats_samtools"]
                     },
                     "samtools/stats": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
-                        "installed_by": [
-                            "bam_stats_samtools"
-                        ]
+                        "installed_by": ["bam_stats_samtools"]
                     },
                     "samtools/view": {
                         "branch": "master",
                         "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "toulligqc": {
                         "branch": "master",
                         "git_sha": "061a322293b3487e53f044304710e54cbf657717",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/toulligqc/toulligqc.diff"
                     },
                     "umitools/dedup": {
                         "branch": "master",
                         "git_sha": "3bd4f34e3093c2a16e6a8eefc22242b9b94641db",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             },
@@ -147,40 +109,30 @@
                     "bam_sort_stats_samtools": {
                         "branch": "master",
                         "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "bam_stats_samtools": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
-                        "installed_by": [
-                            "bam_sort_stats_samtools"
-                        ]
+                        "installed_by": ["bam_sort_stats_samtools"]
                     },
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/local/read_counts.nf b/modules/local/read_counts.nf
index bcb9d51..b3fdd36 100644
--- a/modules/local/read_counts.nf
+++ b/modules/local/read_counts.nf
@@ -1,17 +1,11 @@
 process READ_COUNTS {
     label 'process_low'
 
-    
     conda "conda-forge::perl=5.26.2"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/perl:5.26.2' :
         'biocontainers/perl:5.26.2' }"
 
-   //conda "conda-forge::sed=4.7"
-   //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-   //    'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
-   //    'ubuntu:20.04' }"
-
     input:
     path raw_fastqc
     path trim_fastqc
diff --git a/nextflow.config b/nextflow.config
index 2b3c5ca..104b73f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -62,15 +62,16 @@ params {
     multiqc_methods_description = null
 
     // Boilerplate options
-    outdir                     = null
-    publish_dir_mode           = 'copy'
-    email                      = null
-    email_on_fail              = null
-    plaintext_email            = false
-    monochrome_logs            = false
-    hook_url                   = null
-    help                       = false
-    version                    = false
+    outdir                       = null
+    publish_dir_mode             = 'copy'
+    email                        = null
+    email_on_fail                = null
+    plaintext_email              = false
+    monochrome_logs              = false
+    hook_url                     = null
+    help                         = false
+    version                      = false
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
 
     // Config options
     config_profile_name        = null
@@ -106,103 +107,109 @@ try {
 }
 
 // Load nf-core/scnanoseq custom profiles from different institutions.
-// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs!
-// try {
-//   includeConfig "${params.custom_config_base}/pipeline/scnanoseq.config"
-// } catch (Exception e) {
-//   System.err.println("WARNING: Could not load nf-core/config/scnanoseq profiles: ${params.custom_config_base}/pipeline/scnanoseq.config")
-// }
+try {
+    includeConfig "${params.custom_config_base}/pipeline/scnanoseq.config"
+} catch (Exception e) {
+    System.err.println("WARNING: Could not load nf-core/config/scnanoseq profiles: ${params.custom_config_base}/pipeline/scnanoseq.config")
+}
 profiles {
     debug {
-        dumpHashes             = true
-        process.beforeScript   = 'echo $HOSTNAME'
-        cleanup                = false
+        dumpHashes              = true
+        process.beforeScript    = 'echo $HOSTNAME'
+        cleanup                 = false
         nextflow.enable.configProcessNamesValidation = true
     }
     conda {
-        conda.enabled          = true
-        docker.enabled         = false
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
-        channels               = ['conda-forge', 'bioconda', 'defaults']
-        apptainer.enabled      = false
+        conda.enabled           = true
+        docker.enabled          = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+        conda.channels          = ['conda-forge', 'bioconda', 'defaults']
+        apptainer.enabled       = false
     }
     mamba {
-        conda.enabled          = true
-        conda.useMamba         = true
-        docker.enabled         = false
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
-        apptainer.enabled      = false
+        conda.enabled           = true
+        conda.useMamba          = true
+        docker.enabled          = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+        apptainer.enabled       = false
     }
     docker {
-        docker.enabled         = true
-        conda.enabled          = false
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
-        apptainer.enabled      = false
-        docker.runOptions      = '-u $(id -u):$(id -g)'
+        docker.enabled          = true
+        conda.enabled           = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+        apptainer.enabled       = false
+        docker.runOptions       = '-u $(id -u):$(id -g)'
     }
     arm {
-        docker.runOptions      = '-u $(id -u):$(id -g) --platform=linux/amd64'
+        docker.runOptions       = '-u $(id -u):$(id -g) --platform=linux/amd64'
     }
     singularity {
-        singularity.enabled    = true
-        singularity.autoMounts = true
-        conda.enabled          = false
-        docker.enabled         = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
-        apptainer.enabled      = false
+        singularity.enabled     = true
+        singularity.autoMounts  = true
+        conda.enabled           = false
+        docker.enabled          = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+        apptainer.enabled       = false
     }
     podman {
-        podman.enabled         = true
-        conda.enabled          = false
-        docker.enabled         = false
-        singularity.enabled    = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
-        apptainer.enabled      = false
+        podman.enabled          = true
+        conda.enabled           = false
+        docker.enabled          = false
+        singularity.enabled     = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+        apptainer.enabled       = false
     }
     shifter {
-        shifter.enabled        = true
-        conda.enabled          = false
-        docker.enabled         = false
-        singularity.enabled    = false
-        podman.enabled         = false
-        charliecloud.enabled   = false
-        apptainer.enabled      = false
+        shifter.enabled         = true
+        conda.enabled           = false
+        docker.enabled          = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        charliecloud.enabled    = false
+        apptainer.enabled       = false
     }
     charliecloud {
-        charliecloud.enabled   = true
-        conda.enabled          = false
-        docker.enabled         = false
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        apptainer.enabled      = false
+        charliecloud.enabled    = true
+        conda.enabled           = false
+        docker.enabled          = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        apptainer.enabled       = false
     }
     apptainer {
-        apptainer.enabled      = true
-        apptainer.autoMounts   = true
-        conda.enabled          = false
-        docker.enabled         = false
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
+        apptainer.enabled       = true
+        apptainer.autoMounts    = true
+        conda.enabled           = false
+        docker.enabled          = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+    }
+    wave {
+        apptainer.ociAutoPull   = true
+        singularity.ociAutoPull = true
+        wave.enabled            = true
+        wave.freeze             = true
+        wave.strategy           = 'conda,container'
     }
     gitpod {
-        executor.name          = 'local'
-        executor.cpus          = 4
-        executor.memory        = 8.GB
+        executor.name           = 'local'
+        executor.cpus           = 4
+        executor.memory         = 8.GB
     }
     test      { includeConfig 'conf/test.config'      }
     test_full { includeConfig 'conf/test_full.config' }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 9418c53..f9489a5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -453,6 +453,13 @@
                     "description": "Validation of parameters in lenient more.",
                     "hidden": true,
                     "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+                },
+                "pipelines_testdata_base_path": {
+                    "type": "string",
+                    "fa_icon": "far fa-check-circle",
+                    "description": "Base URL or local path to location of pipeline test dataset files",
+                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/",
+                    "hidden": true
                 }
             }
         }
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 5611062..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff.
-# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
-[tool.ruff]
-line-length = 120
-target-version = "py38"
-cache-dir = "~/.cache/ruff"
-
-[tool.ruff.lint]
-select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
-
-[tool.ruff.lint.isort]
-known-first-party = ["nf_core"]
-
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["E402", "F401"]
diff --git a/subworkflows/local/utils_nfcore_scnanoseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_scnanoseq_pipeline/main.nf
index a1ecce9..b7625a3 100644
--- a/subworkflows/local/utils_nfcore_scnanoseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_scnanoseq_pipeline/main.nf
@@ -137,6 +137,10 @@ workflow PIPELINE_COMPLETION {
             imNotification(summary_params, hook_url)
         }
     }
+
+    workflow.onError {
+        log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting"
+    }
 }
 
 /*
@@ -227,8 +231,16 @@ def methodsDescriptionText(mqc_methods_yaml) {
     meta["manifest_map"] = workflow.manifest.toMap()
 
     // Pipeline DOI
-    meta["doi_text"] = meta.manifest_map.doi ? "(doi: <a href=\'https://doi.org/${meta.manifest_map.doi}\'>${meta.manifest_map.doi}</a>)" : ""
-    meta["nodoi_text"] = meta.manifest_map.doi ? "": "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"
+    if (meta.manifest_map.doi) {
+        // Using a loop to handle multiple DOIs
+        // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+        // Removing ` ` since the manifest.doi is a string and not a proper list
+        def temp_doi_ref = ""
+        String[] manifest_doi = meta.manifest_map.doi.tokenize(",")
+        for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: <a href=\'https://doi.org/${doi_ref.replace("https://doi.org/", "").replace(" ", "")}\'>${doi_ref.replace("https://doi.org/", "").replace(" ", "")}</a>), "
+        meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2)
+    } else meta["doi_text"] = ""
+    meta["nodoi_text"] = meta.manifest_map.doi ? "" : "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"
 
     // Tool references
     meta["tool_citations"] = ""