Skip to content

Commit ca5382f

Browse files
committed
Updating testing with config files.
1 parent 8763a48 commit ca5382f

13 files changed

+1974
-22
lines changed

.github/workflows/python-app.yml

+22-22
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,28 @@ on:
99
pull_request:
1010
branches:
1111

12-
jobs:
13-
build:
14-
runs-on: ubuntu-latest
15-
16-
steps:
17-
- uses: actions/checkout@v3
18-
- uses: s-weigand/[email protected]
19-
with:
20-
conda-channels: bioconda, conda-forge
21-
activate-conda: true
22-
repository: NCSA/NEAT
23-
- name: basic test
24-
run: |
25-
conda env create -f environment.yml -n test_neat
26-
conda activate test_neat
27-
poetry install
28-
neat
29-
30-
- name: run coverage tests
31-
run: |
32-
conda activate test_neat
33-
python tests/coverage_tests.py
12+
#jobs:
13+
# build:
14+
# runs-on: ubuntu-latest
15+
#
16+
# steps:
17+
# - uses: actions/checkout@v3
18+
# - uses: s-weigand/[email protected]
19+
# with:
20+
# conda-channels: bioconda, conda-forge
21+
# activate-conda: true
22+
# repository: NCSA/NEAT
23+
# - name: basic test
24+
# run: |
25+
# conda env create -f environment.yml -n test_neat
26+
# conda activate test_neat
27+
# poetry install
28+
# neat
29+
#
30+
# - name: run coverage tests
31+
# run: |
32+
# conda activate test_neat
33+
# python tests/coverage_tests.py
3434

3535
# - name: lint with flake8
3636
# run: |

config_template/config_test1.yml

+177
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Test 1: Default parameters, H1N1 data
2+
3+
## Template for gen_reads parallel
4+
## Any parameter that is not required but has a default value will use the
5+
## default value even if the variable is not included in the config. For
6+
## required items, they must be included in the config and the must be given a value.
7+
## All other items can be present or not. If present and the value is set to a single
8+
## period, the variable will be treated as though it had been omitted. Please do
9+
## not modify this template, but instead make a copy in your working directory. Done this
10+
## way, you can run without even needing to declare -c.
11+
12+
# Absolute path to input reference fasta file
13+
# type = string | required: yes
14+
reference: /user/path/NEAT/data/H1N1.fa
15+
16+
# How to partition the reference for analysis. By default, NEAT will
17+
# attempt to process one contig per thread. However, if you have very
18+
# large fasta files, you will see additional runtime benefit from choosing
19+
# the subdivision method, which will split the contigs up into equal sizes
20+
# for processing. If you need further speedups and have access to a distributed system
21+
# you can use a shell script wrapper around NEAT to split the fasta into
22+
# contigs, then join the results later. NEAT does not feature translocations, so
23+
# this will not affect NEAT's output. Note that subdivision will only activate for
24+
# number of threads > 1.
25+
# type = string | required: no | default = chrom | possible values: chrom, subdivision
26+
partition_mode: .
27+
28+
# Read length of the reads in the fastq output. Only required if @produce_fastq is set to true
29+
# type = int | required: no | default = 101
30+
read_len: .
31+
32+
# Number of threads to request for NEAT. The recommended amount is the number of chromosomes in
33+
# your input fasta plus 1.
34+
# type = int | required: no | default = 1
35+
threads: .
36+
37+
# Average Coverage for the entire genome.
38+
# type = float | required: no | default = 10.0
39+
coverage: .
40+
41+
# Absolute path to file with sequencing error model
42+
# type = string | required: no | default: <NEAT_DIR>/neat/models/defaults/default_error_model.pickle.gz
43+
error_model: .
44+
45+
# Average sequencing error rate for the sequencing machine
46+
# type = float | required = no | must be between 0.0 and 0.3
47+
avg_seq_error: .
48+
49+
# This scales the quality scores to match the desired average sequencing error rate
50+
# specified by avg_seq_error.
51+
# type: boolean | required = no | default = false
52+
rescale_qualities: .
53+
54+
# This is the factor to add to the quality scores to get the ascii text version of the
55+
# score. The default follows the sanger quality offset
56+
# type: int | required = no | default = 33
57+
quality_offset: .
58+
59+
# Desired ploidy
60+
# type = int | required = no | default = 2
61+
ploidy: .
62+
63+
# Absolute path to vcf file containing variants that will always be included, regardless
64+
# of genotype and filter. You can pre-filter your vcf for these fields before inputting it
65+
# if this is not the desired behavior.
66+
# type: string | required = no
67+
input_variants: .
68+
69+
# Absolute path to bed file containing reference regions that the simulation
70+
# should target.
71+
# type = string | required = no
72+
target_bed: .
73+
74+
# Scalar value for coverage in regions outside the targeted bed. Example 0.5
75+
# would get you roughly half the coverage as the on target areas. Default is
76+
# 2% of total coverage in off-target regions.
77+
# type: float | required = no | default = 0.02
78+
off_target_scalar: .
79+
80+
# Whether to discard areas outside the targeted bed region. By default, this is set
81+
# to false and NEAT will use a different model for off-target regions but still
82+
# include them in the final output.
83+
# TODO this may not be necessary
84+
# type: boolean | required = no | default = false
85+
discard_offtarget: .
86+
87+
# Absolute path to bed file containing reference regions that the simulation
88+
# should discard.
89+
# type = string | required = no
90+
discard_bed: .
91+
92+
# Absolute path to the mutation model pickle file. Omitting this value will cause
93+
# NEAT to use the default model, with some standard parameters, and generally uniform biases.
94+
# type: string | required = no
95+
mutation_model: .
96+
97+
# Average mutation rate per base pair. Overall average is 0.001, or model default
98+
# Use either this value to override the mutation rate for the default or input model.
99+
# type: float | required = no | must be between 0.0 and 0.3
100+
mutation_rate: .
101+
102+
# Absolute path to a bed file with mutation rates by region.
103+
# Rates must be in the fourth column and be of the form "mut_rate=x.xx"
104+
# Rates must be between 0.00 and 0.03
105+
# type: string | required = no
106+
mutation_bed: .
107+
108+
# Absolute path to GC content model generated by compute_gc.py
109+
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_gc_bias_model.pickle.gz
110+
gc_model: .
111+
112+
# Whether the output should be paired ended. For certain conditions (i.e., vcf only or
113+
# fasta only), this will be ignored. If this is true, then there must be an included fragment
114+
# length model output from runner.py or a mean and standard deviation
115+
# by declaring values for @fragment_mean and @fragment_std_dev.
116+
# type: boolean | required = no | default = false
117+
paired_ended: .
118+
119+
# Absolute path to a pickle file containing the fragment length model output
120+
# from runner.py.
121+
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_fraglen_model.pickle.gz
122+
fragment_model: .
123+
124+
# Mean for the paired end fragment length. This only applies if paired-ended is set to true.
125+
# This number will form the mean for the sample distribution of the fragment lengths in the simulation
126+
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used.
127+
# type: float | required: no (unless paired-ended)
128+
fragment_mean: .
129+
130+
# Standard deviation for the paired end fragment length. This only applies if paired-ended is set to true.
131+
# This number will form the standard deviation about the mean specified above for the sample distribution
132+
# of the fragment lengths in the simulation.
133+
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used.
134+
# type: float | required: no (unless paired-ended)
135+
fragment_st_dev: .
136+
137+
# Whether to produce the golden bam file. This file will contain the reads
138+
# aligned with the exact region of the genome
139+
# type: boolean | required = no | default = false
140+
produce_bam: .
141+
142+
# Whether to produce a vcf file containing all the mutation errors added
143+
# by NEAT.
144+
# type: boolean | required = no | default = false
145+
produce_vcf: .
146+
147+
# Whether to output the mutated fasta. This will output a fasta file with mutations
148+
# inserted. It does not include sequencing errors or read information. Useful for
149+
# multigenerational mutations.
150+
# type: boolean | required = no | default = false
151+
produce_fasta: .
152+
153+
# Whether to output the fastq(s) of the reads. This is the default output. NEAT
154+
# will produce 1 fastq for single ended reads or 2 fastqs for paired ended.
155+
# type: boolean | required = no | default = true
156+
produce_fastq: .
157+
158+
# If set to true, this will ignore statistical models and force coverage to be
159+
# constant across the genome. This is considered a debugging feature.
160+
# type: boolean | required = no | default = false
161+
no_coverage_bias: .
162+
163+
# Set an RNG seed value. Runs using identical RNG values should produce identical results
164+
# so things like read locations, variant positions, error positions, etc. should be the same.
165+
# Useful for debugging.
166+
# type: int | required = no
167+
rng_seed: .
168+
169+
# Set an absolute minimum number of mutations. The program always adds at least 1 mutation.
170+
# Useful for very small datasets.
171+
# type: int | required = no
172+
min_mutations: .
173+
174+
# Overwrite the output files, if they are named the same as the current run.
175+
# Default is to quit if files already exist to avoid data destruction
176+
# type: bool | required = no | default = false
177+
overwrite_output: True

0 commit comments

Comments
 (0)