|
| 1 | +# Test 1: Default parameters, H1N1 data |
| 2 | + |
| 3 | +## Template for gen_reads parallel |
| 4 | +## Any parameter that is not required but has a default value will use the |
| 5 | +## default value even if the variable is not included in the config. For |
| 6 | +## required items, they must be included in the config and the must be given a value. |
| 7 | +## All other items can be present or not. If present and the value is set to a single |
| 8 | +## period, the variable will be treated as though it had been omitted. Please do |
| 9 | +## not modify this template, but instead make a copy in your working directory. Done this |
| 10 | +## way, you can run without even needing to declare -c. |
| 11 | + |
| 12 | +# Absolute path to input reference fasta file |
| 13 | +# type = string | required: yes |
| 14 | +reference: /user/path/NEAT/data/H1N1.fa |
| 15 | + |
| 16 | +# How to partition the reference for analysis. By default, NEAT will |
| 17 | +# attempt to process one contig per thread. However, if you have very |
| 18 | +# large fasta files, you will see additional runtime benefit from choosing |
| 19 | +# the subdivision method, which will split the contigs up into equal sizes |
| 20 | +# for processing. If you need further speedups and have access to a distributed system |
| 21 | +# you can use a shell script wrapper around NEAT to split the fasta into |
| 22 | +# contigs, then join the results later. NEAT does not feature translocations, so |
| 23 | +# this will not affect NEAT's output. Note that subdivision will only activate for |
| 24 | +# number of threads > 1. |
| 25 | +# type = string | required: no | default = chrom | possible values: chrom, subdivision |
| 26 | +partition_mode: . |
| 27 | + |
| 28 | +# Read length of the reads in the fastq output. Only required if @produce_fastq is set to true |
| 29 | +# type = int | required: no | default = 101 |
| 30 | +read_len: . |
| 31 | + |
| 32 | +# Number of threads to request for NEAT. The recommended amount is the number of chromosomes in |
| 33 | +# your input fasta plus 1. |
| 34 | +# type = int | required: no | default = 1 |
| 35 | +threads: . |
| 36 | + |
| 37 | +# Average Coverage for the entire genome. |
| 38 | +# type = float | required: no | default = 10.0 |
| 39 | +coverage: . |
| 40 | + |
| 41 | +# Absolute path to file with sequencing error model |
| 42 | +# type = string | required: no | default: <NEAT_DIR>/neat/models/defaults/default_error_model.pickle.gz |
| 43 | +error_model: . |
| 44 | + |
| 45 | +# Average sequencing error rate for the sequencing machine |
| 46 | +# type = float | required = no | must be between 0.0 and 0.3 |
| 47 | +avg_seq_error: . |
| 48 | + |
| 49 | +# This scales the quality scores to match the desired average sequencing error rate |
| 50 | +# specified by avg_seq_error. |
| 51 | +# type: boolean | required = no | default = false |
| 52 | +rescale_qualities: . |
| 53 | + |
| 54 | +# This is the factor to add to the quality scores to get the ascii text version of the |
| 55 | +# score. The default follows the sanger quality offset |
| 56 | +# type: int | required = no | default = 33 |
| 57 | +quality_offset: . |
| 58 | + |
| 59 | +# Desired ploidy |
| 60 | +# type = int | required = no | default = 2 |
| 61 | +ploidy: . |
| 62 | + |
| 63 | +# Absolute path to vcf file containing variants that will always be included, regardless |
| 64 | +# of genotype and filter. You can pre-filter your vcf for these fields before inputting it |
| 65 | +# if this is not the desired behavior. |
| 66 | +# type: string | required = no |
| 67 | +input_variants: . |
| 68 | + |
| 69 | +# Absolute path to bed file containing reference regions that the simulation |
| 70 | +# should target. |
| 71 | +# type = string | required = no |
| 72 | +target_bed: . |
| 73 | + |
| 74 | +# Scalar value for coverage in regions outside the targeted bed. Example 0.5 |
| 75 | +# would get you roughly half the coverage as the on target areas. Default is |
| 76 | +# 2% of total coverage in off-target regions. |
| 77 | +# type: float | required = no | default = 0.02 |
| 78 | +off_target_scalar: . |
| 79 | + |
| 80 | +# Whether to discard areas outside the targeted bed region. By default, this is set |
| 81 | +# to false and NEAT will use a different model for off-target regions but still |
| 82 | +# include them in the final output. |
| 83 | +# TODO this may not be necessary |
| 84 | +# type: boolean | required = no | default = false |
| 85 | +discard_offtarget: . |
| 86 | + |
| 87 | +# Absolute path to bed file containing reference regions that the simulation |
| 88 | +# should discard. |
| 89 | +# type = string | required = no |
| 90 | +discard_bed: . |
| 91 | + |
| 92 | +# Absolute path to the mutation model pickle file. Omitting this value will cause |
| 93 | +# NEAT to use the default model, with some standard parameters, and generally uniform biases. |
| 94 | +# type: string | required = no |
| 95 | +mutation_model: . |
| 96 | + |
| 97 | +# Average mutation rate per base pair. Overall average is 0.001, or model default |
| 98 | +# Use either this value to override the mutation rate for the default or input model. |
| 99 | +# type: float | required = no | must be between 0.0 and 0.3 |
| 100 | +mutation_rate: . |
| 101 | + |
| 102 | +# Absolute path to a bed file with mutation rates by region. |
| 103 | +# Rates must be in the fourth column and be of the form "mut_rate=x.xx" |
| 104 | +# Rates must be between 0.00 and 0.03 |
| 105 | +# type: string | required = no |
| 106 | +mutation_bed: . |
| 107 | + |
| 108 | +# Absolute path to GC content model generated by compute_gc.py |
| 109 | +# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_gc_bias_model.pickle.gz |
| 110 | +gc_model: . |
| 111 | + |
| 112 | +# Whether the output should be paired ended. For certain conditions (i.e., vcf only or |
| 113 | +# fasta only), this will be ignored. If this is true, then there must be an included fragment |
| 114 | +# length model output from runner.py or a mean and standard deviation |
| 115 | +# by declaring values for @fragment_mean and @fragment_std_dev. |
| 116 | +# type: boolean | required = no | default = false |
| 117 | +paired_ended: . |
| 118 | + |
| 119 | +# Absolute path to a pickle file containing the fragment length model output |
| 120 | +# from runner.py. |
| 121 | +# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_fraglen_model.pickle.gz |
| 122 | +fragment_model: . |
| 123 | + |
| 124 | +# Mean for the paired end fragment length. This only applies if paired-ended is set to true. |
| 125 | +# This number will form the mean for the sample distribution of the fragment lengths in the simulation |
| 126 | +# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used. |
| 127 | +# type: float | required: no (unless paired-ended) |
| 128 | +fragment_mean: . |
| 129 | + |
| 130 | +# Standard deviation for the paired end fragment length. This only applies if paired-ended is set to true. |
| 131 | +# This number will form the standard deviation about the mean specified above for the sample distribution |
| 132 | +# of the fragment lengths in the simulation. |
| 133 | +# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used. |
| 134 | +# type: float | required: no (unless paired-ended) |
| 135 | +fragment_st_dev: . |
| 136 | + |
| 137 | +# Whether to produce the golden bam file. This file will contain the reads |
| 138 | +# aligned with the exact region of the genome |
| 139 | +# type: boolean | required = no | default = false |
| 140 | +produce_bam: . |
| 141 | + |
| 142 | +# Whether to produce a vcf file containing all the mutation errors added |
| 143 | +# by NEAT. |
| 144 | +# type: boolean | required = no | default = false |
| 145 | +produce_vcf: . |
| 146 | + |
| 147 | +# Whether to output the mutated fasta. This will output a fasta file with mutations |
| 148 | +# inserted. It does not include sequencing errors or read information. Useful for |
| 149 | +# multigenerational mutations. |
| 150 | +# type: boolean | required = no | default = false |
| 151 | +produce_fasta: . |
| 152 | + |
| 153 | +# Whether to output the fastq(s) of the reads. This is the default output. NEAT |
| 154 | +# will produce 1 fastq for single ended reads or 2 fastqs for paired ended. |
| 155 | +# type: boolean | required = no | default = true |
| 156 | +produce_fastq: . |
| 157 | + |
| 158 | +# If set to true, this will ignore statistical models and force coverage to be |
| 159 | +# constant across the genome. This is considered a debugging feature. |
| 160 | +# type: boolean | required = no | default = false |
| 161 | +no_coverage_bias: . |
| 162 | + |
| 163 | +# Set an RNG seed value. Runs using identical RNG values should produce identical results |
| 164 | +# so things like read locations, variant positions, error positions, etc. should be the same. |
| 165 | +# Useful for debugging. |
| 166 | +# type: int | required = no |
| 167 | +rng_seed: . |
| 168 | + |
| 169 | +# Set an absolute minimum number of mutations. The program always adds at least 1 mutation. |
| 170 | +# Useful for very small datasets. |
| 171 | +# type: int | required = no |
| 172 | +min_mutations: . |
| 173 | + |
| 174 | +# Overwrite the output files, if they are named the same as the current run. |
| 175 | +# Default is to quit if files already exist to avoid data destruction |
| 176 | +# type: bool | required = no | default = false |
| 177 | +overwrite_output: True |
0 commit comments