diff --git a/BioExcel_SeqQC.png b/BioExcel_SeqQC.png new file mode 100644 index 0000000..f07b60e Binary files /dev/null and b/BioExcel_SeqQC.png differ diff --git a/README.md b/README.md index 4ba8fb9..587d334 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ $ bioexcel_seqqc -h An example of basic usage of the pipeline is: ```bash -$ bioexcel_seqqc --files in1.fa in2.fa --threads 4 --outdir ./output +$ bxcl_seqqc --files in1.fa in2.fa --threads 4 --outdir ./output ``` ### Editing configuration for checkFastQC stage @@ -74,7 +74,7 @@ changed. First, output an example configuration file (which contains the default values): ```bash -$ bioexcel_seqqc --printconfig +$ bxcl_seqqc --printconfig ``` The file lists the summary outputs from FastQC, and what decisions to make @@ -102,4 +102,24 @@ fqc.wait() trim_process = rt.trimQC(infiles, trimdir, threads): trim_process.wait() +``` + +## Stages + +Our pipeline consists of three main stages: runfastqc, checkfastqc and runtrim. +Each stage exists as a python module as shown above. Each module contains +specific functions that execute the tools listed. The diagram below shows +each of these stages, with colour coding to show which tools are used in each +module, as well as useful output files. For this work, the module checkfastqc +was developed specifically to remove the human intervention required to check +output from fastqc before continuing with trimming/further analysis. + +![alt text](./BioExcel_SeqQC.png "BioExcel_SeqQC workflow") + +Each module can also be executed independently of the main executable workflow. +For example, if a situation occurs that causes cutadapt to fail, the runtrim +stage can be executed from the command line as + +```bash +$ python -m bioexcel_align.runtrim ``` \ No newline at end of file diff --git a/bioexcel_seqqc/checkfastqc.py b/bioexcel_seqqc/checkfastqc.py index 3e61e7e..79a6f54 100755 --- a/bioexcel_seqqc/checkfastqc.py +++ b/bioexcel_seqqc/checkfastqc.py @@ -72,7 +72,6 @@ def get_qc(fqcdir, passthrough, qcconf): #qclist.append(splitline[0]) #Store Pass/Warn/Fail in list # Check for dependance on 1st or 2nd pass through - print(qcpass, qtrim, atrim, recheck) return qcpass, qtrim, atrim, recheck @@ -114,8 +113,9 @@ def check_qc(infiles, fqcdir, trimdir, tmpdir, adaptseq, qcconf, threads, if recheck: ### Will need work if logic changes to need retrim after pass 2 passthrough = 'pass2' - - pfqc = rfqc.run_fqc([f1, f2], fqcdir+'/'+passthrough, tmpdir, threads) + + pfqc = rfqc.run_fqc([f1, f2], fqcdir+'/'+passthrough, tmpdir, + threads) pfqc.wait() qcpass, qtrim, atrim, recheck = get_qc(fqcdir+'/'+passthrough, passthrough, qcconf)