-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
290 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
# The following configuration file holds very important variables that will help operate I L I A D. | ||
# There are many provided download links that I L I A D will automatically download when you, the user, invoke it. | ||
# All of these come with no warranties. Needless to say, but links tend to break over time! | ||
# We will do our best to keep them up-to-date. | ||
# Feel free to replace any links with your own preferences of files. | ||
# Again, this comes with no warranties. | ||
|
||
# __Author__ = Noah Herrick | ||
# __Email__ = [email protected] | ||
# __Software__ = Iliad: Suite of Snakemake Genomic Data Processing Workflows | ||
# __License__ = MIT License | ||
# __copyright__ = Copyright 2023, Noah Herrick | ||
# __Year__ = 2023 | ||
# __Version__ = 1.0.0 | ||
|
||
##################################### | ||
##################################### | ||
##################################### | ||
|
||
# # # USER INPUT VARIABLES # # # | ||
|
||
##################################### | ||
##################################### | ||
##################################### | ||
|
||
# You must insert your /PATH/TO/Iliad/ | ||
# use 'pwd' command to find your current working directory when you are inside of Iliad directory | ||
# e.g. /user/name/projects/Iliad/ <---- must include forward slash at the end of working directory path | ||
|
||
# must include forward slash, '/', at the end of working directory path | ||
workdirPath: NEED PATH HERE | ||
|
||
|
||
|
||
############################################################################################## | ||
### --- Default tables and samples for Raw Sequence (FASTQ) and Stored Sequence (CRAM) --- ### | ||
# ------------------------------------------------------------------------------------------ # | ||
|
||
# for downloading FastQ raw seq data make sure there is an Excel table or csv document with two columns and no header: Sample_Name,FTP_url | ||
# e.g KPGP-00127,ftp://ftp.kobic.re.kr/pub/KPGP/2020_release_candidate/WGS_SR/KPGP-00127/KPGP-00127_L1_R1.fq.gz | ||
samplesDict: config/UserSampleTable.csv | ||
# must include list of samples in one column with "sample" header, no matter if you use the download feature or if you re-direct ILIAD to FASTQ data path | ||
samples: config/samples.tsv | ||
|
||
# Same setup as above for if you are retrieving CRAM files from an FTP server | ||
cramSamplesDict: config/cramSampleTable.csv | ||
cramSamples: config/cramSamples.tsv | ||
|
||
########################################################### | ||
### --- Default GENOME REFERENCE ASSEMBLY retrieval --- ### | ||
# ------------------------------------------------------- # | ||
|
||
# If you want to automatically download reference genome assembly, configure below AutoRetrieveReference as 'true' - otherwise leave blank! | ||
AutoRetrieveReference: true # default is true | ||
# If you already have specific reference genome assembly, configure below IhaveReference as 'true', | ||
# place into your ./Iliad/resources/ directory, | ||
# and configure filename below | ||
# - otherwise leave blank! | ||
IhaveReference: # default is blank | ||
|
||
# If you have your own reference file to use, state the filePath | ||
# - DO NOT REMOVE "resources/". | ||
# It MUST be in the your "./Iliad/resources/" directory like so ./Iliad/resources/FILENAME | ||
reference: | ||
filePath: resources/GRCh38_full_analysis_set_plus_decoy_hla.fa # This is a popular example that you might already have filed away | ||
|
||
############################################################ | ||
### --- Default VARIANT CALLING options via BCFtools --- ### | ||
# -------------------------------------------------------- # | ||
|
||
# BCFtools manual LINK: https://samtools.github.io/bcftools/bcftools.html | ||
# BCFtools cheat sheet LINK: https://gist.github.com/elowy01/93922762e131d7abd3c7e8e166a74a0b | ||
|
||
VariantCalling: | ||
# # See BCFtools manual for adding additional options, e.g. for base alignment quality "-B". Just add options within bounds of quotations | ||
mpileup: | ||
options: "-d 8000 -B" # default is → -d 8000 -B | ||
call: | ||
options: "-m -A" # default is → -m -A | ||
|
||
# Normalize and Left-align - configure below Normalize as 'true' - otherwise leave blank! | ||
Normalize: true | ||
|
||
# # See BCFtools manual for adding additional options. Just add options within bounds of quotations. | ||
# Current options in effect when "Normalize: true" are "norm -f {reference}" | ||
# you can add other flags using the 'options: "[add more options here]"' below | ||
Norm: | ||
options: "" # default is blank | ||
|
||
# DO NOT Normalize and Left-align - configure below doNotNormalize as 'true' - otherwise leave blank! | ||
doNotNormalize: # default is blank - benchmarked as true | ||
|
||
################################################ | ||
### --- Lift and Merge Submodule Options --- ### | ||
# -------------------------------------------- # | ||
|
||
# place the appropriate BASE of each filename under the file header "baseFileName_VCF" | ||
# i.e. if FILENAME.vcf, then the BASE is "FILENAME". | ||
# These can be either compressed (.vcf.gz and .vcf.gz.[tbi/csi]) or uncompressed (.vcf). | ||
# a compressed file will need the associated index file in the directory, too. | ||
vcfs: config/mergeTheseVCFs.txt | ||
|
||
LiftoverTF: true # default is true | ||
|
||
# update your genomic positions to Homo sapiens GRCh38 reference assembly - configure below Version38 as 'true' - otherwise mark 'false'! | ||
Version38: true # default is true | ||
# update your genomic positions to Homo sapiens GRCh37 reference assembly - configure above Version38 as 'false' | ||
|
||
dbsnpLiftMerge: | ||
|
||
desiredVersion: GRCh38 | ||
projectName: Demo | ||
|
||
#----------- 37 ------------- | ||
dbsnp37VcfDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz | ||
dbsnp37TbiDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz.tbi | ||
file37: All_20180423.vcf.gz | ||
#----------- 38 ------------- | ||
dbsnp38VcfDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz | ||
dbsnp38TbiDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz.tbi | ||
file38: All_20180418.vcf.gz | ||
|
||
genomeReference: | ||
#----------- 37 ------------- | ||
37Reference: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/human_g1k_v37 | ||
file37: human_g1k_v37.fasta | ||
#----------- 38 ------------- | ||
38Reference: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/ | ||
file38: GRCh38_full_analysis_set_plus_decoy_hla.fa | ||
index38: GRCh38_full_analysis_set_plus_decoy_hla.fa.fai | ||
|
||
############################################# | ||
### --- dbSNP annotation file options --- ### | ||
# ----------------------------------------- # | ||
|
||
# used in Raw Sequence Module, Stored Sequence Module, SNP Array Module | ||
# the uncommented configuration options will be used for these modules. | ||
# switch the commented/uncommented three configuration lines if you would like to switch versions. | ||
# you may also update the FTP links if you would like a different dbSNP annotation file - but of course that comes with no warranties | ||
|
||
dbSNP: | ||
# FTP site: https://ftp.ncbi.nih.gov/snp/ | ||
# dbsnp all file | ||
# Check to see if you are using correct assembly with your project | ||
#----------- 37 ------------- | ||
# dbsnpVcfDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz | ||
# dbsnpTbiDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz.tbi | ||
# file: All_20180423.vcf.gz | ||
#----------- 38 ------------- | ||
dbsnpVcfDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz | ||
dbsnpTbiDownload: https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz.tbi | ||
file: All_20180418.vcf.gz | ||
|
||
##################################### | ||
##################################### | ||
##################################### | ||
|
||
# # # DEFAULT VARIABLES # # # | ||
|
||
##################################### | ||
##################################### | ||
##################################### | ||
|
||
# used in Raw Sequence Module, Stored Sequence Module, and SNP Array Module | ||
# Reference Genome Assembly | ||
ref: | ||
# ensembl species name | ||
species: homo_sapiens | ||
# ensembl release | ||
release: 104 | ||
# genome build | ||
build: GRCh38 | ||
|
||
# used in Raw Sequence Module and Stored Sequence Module | ||
# Annotation files for variant calling | ||
NYGC: | ||
# FTP Site: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20190425_NYGC_GATK/ | ||
# annotations files are in GRCh38 assembly | ||
nygcUrlPath: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20190425_NYGC_GATK/annotated/ | ||
nygcFileStart: CCDG_13607_B01_GRM_WGS_2019-02-19_chr | ||
nygcFileEnd: .recalibrated_variants.annotated.txt | ||
numberOfSplitRegionsFiles: 5 | ||
|
||
################################### | ||
### --- RAW SEQUENCE MODULE --- ### | ||
# ------------------------------- # | ||
|
||
# used to shorten the downloaded directories so file is directly placed in correct folder - will need to edit based on your FTP download path | ||
url: | ||
cutdirs: 5 | ||
|
||
###################################### | ||
### --- STORED SEQUENCE MODULE --- ### | ||
# ---------------------------------- # | ||
|
||
# used to shorten the downloaded directories so file is directly placed in correct folder - will need to edit based on your FTP download path | ||
cramUrl: | ||
cutdirs: 5 | ||
|
||
################################ | ||
### --- SNP ARRAY MODULE --- ### | ||
# ---------------------------- # | ||
|
||
urlProductFiles: | ||
# product files LINK | ||
# LINK: https://support.illumina.com/downloads/infinium-multi-ethnic-global-8-v1-product-files.html | ||
# manifest file LINK: update to 37 or 38, make sure its BPM file | ||
|
||
#----------- 37 ------------- | ||
#manifest: https://webdata.illumina.com/downloads/productfiles/multiethnic-global-8/v1-0/infinium-multi-ethnic-global-8-d1-bpm.zip | ||
#mzip: infinium-multi-ethnic-global-8-d1-bpm.zip | ||
#filename: Multi-EthnicGlobal_D1.bpm # for expanding function later | ||
##build: D1 # for expanding function later | ||
|
||
#----------- 38 ------------- | ||
## LINK: ftp://ussd-ftp.illumina.com/downloads/productfiles/multiethnic-global-8/v1-0/build38 | ||
manifest: https://webdata.illumina.com/downloads/productfiles/multiethnic-global-8/v1-0/build38/multi-ethnic-global-8-d2-bpm.zip | ||
mzip: multi-ethnic-global-8-d2-bpm.zip | ||
#filename: Multi-EthnicGlobal_D2.bpm # for expanding function later | ||
##build: D2 # for expanding function later | ||
|
||
# cluster file LINK | ||
cluster: https://webdata.illumina.com/downloads/productfiles/multiethnic-global-8/v1-0/infinium-multi-ethnic-global-8-d1-cluster-file.zip | ||
czip: infinium-multi-ethnic-global-8-d1-cluster-file.zip | ||
|
||
urlSupportFiles: | ||
# support files LINK | ||
|
||
# LINK: https://support.illumina.com/downloads/infinium-multi-ethnic-global-8-v1-support-files.html | ||
|
||
#----------- 37 ------------- uncomment 37 section below if you need to use GRCh37 assembly | ||
|
||
# # physical and genetic coordinates for 37 | ||
#physicalGeneticCoordinates: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/multiethnic-global/multi-ethnic-global-8-d1-physical-genetic-coordinates.zip | ||
#pzip: multi-ethnic-global-8-d1-physical-genetic-coordinates.zip # Multi-EthnicGlobal_D1.csv_Physical-and-Genetic-Coordinates.txt | ||
|
||
#----------- 38 ------------- comment block 38 section below if you need to use GRCh37 assembly | ||
|
||
# physical and genetic coordinates for 38 | ||
physicalGeneticCoordinates: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/multiethnic-global/multi-ethnic-global-8-d2-physical-genetic-coordinates.zip | ||
pzip: multi-ethnic-global-8-d2-physical-genetic-coordinates.zip # Multi-EthnicGlobal_D2.csv_Physical-and-Genetic-Coordinates.txt | ||
|
||
# rsids conversion file - Loci Name to rsID | ||
rsidConversion: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/multiethnic-global/multi-ethnic-global-8-d2-b150-rsids.zip | ||
rzip: multi-ethnic-global-8-d2-b150-rsids.zip | ||
rfile: Multi-EthnicGlobal_D2_b150_rsids.txt | ||
|
||
Illumina: | ||
# iaap-cli exe path | ||
ftpDownload: ftp://webdata2:[email protected]/downloads/software/iaap/iaap-cli-linux-x64-1.1.0.tar.gz | ||
DownloadTarFile: iaap-cli-linux-x64-1.1.0.tar.gz | ||
Download: iaap-cli-linux-x64-1.1.0 | ||
iaapcli: iaap-cli | ||
#iaapcli: /N/project/WalshWGS/IliadGenomicDataPipeline/Iliad/target_workflow/illumina_gencall/AutoConvert2.0/AutoConvert | ||
|
||
################################ | ||
### --- SNP ARRAY MODULE --- ### | ||
### - QC VALUE THRESHOLDS - ### | ||
# ---------------------------- # | ||
|
||
QCarray: | ||
GenTrainUpperThreshold: 0.7 | ||
GenTrainLowerThreshold: 0.67 | ||
ClusterSepUpperThreshold: 0.45 | ||
ClusterSepLowerThreshold: 0.4 | ||
|
||
|
||
##################################### | ||
##################################### | ||
##################################### | ||
|
||
# # # S U B M O D U L E S # # # | ||
|
||
##################################### | ||
##################################### | ||
##################################### | ||
|
||
# The major submodule named - Lift-and-Merge - can be found above near line 101. | ||
# There are many configurations, checks, and automatic steps that may help users with little experience. | ||
# These more independent and small task workflows below may come in handy for some quick data maneuvers. | ||
|
||
MergerSub: | ||
|
||
LiftoverSub: | ||
# either point to file in config directory or enter 1 filename for file needing converted | ||
# Indicate which reference assembly you desire to switch your positions | ||
filename: Tatte-Demo | ||
desiredVersion: GRCh38 # switch to GRCh37 if you need to revert from 38 to 37 | ||
|
||
MergeTargetAndRef: |