Skip to content

Commit 66ab859

Browse files
Merge pull request #109 from ncsa/develop
Develop
2 parents 2c80a44 + c8bf73d commit 66ab859

14 files changed

+2035
-49
lines changed

.github/workflows/python-app.yml

+42-34
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,61 @@
1-
# This workflow will install Python dependencies, run tests and lint with a single version of Python
2-
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
1+
# This workflow configures the environment and executes NEAT read-simulator tests using relative paths for a series of configuration files individually
2+
# For more information on using Python with GitHub Actions, refer to:
3+
# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
34

4-
name: NEAT unit tests
5+
name: NEAT Unit Tests
56

67
on:
78
push:
8-
branches: [ "main", "develop" ]
9+
branches: [develop, main]
910
pull_request:
10-
branches: [ "main" ]
11+
branches: [main]
1112

1213
jobs:
13-
build:
14+
detailed_test_execution:
1415
runs-on: ubuntu-latest
15-
1616
steps:
1717
- uses: actions/checkout@v3
1818
- uses: s-weigand/[email protected]
1919
with:
20-
conda-channels: bioconda, conda-forge
20+
conda-channels: [bioconda, conda-forge]
2121
activate-conda: true
2222
repository: NCSA/NEAT
23-
- name: basic test
23+
- name: Environment Setup
2424
run: |
2525
conda env create -f environment.yml -n test_neat
2626
conda activate test_neat
2727
poetry install
28-
neat
28+
cd config_template
2929
30-
- name: run coverage tests
31-
run: |
32-
conda activate test_neat
33-
python tests/coverage_tests.py
34-
35-
# - name: lint with flake8
36-
# run: |
37-
# conda activate neat
38-
# # stop the build if there are Python syntax errors or undefined names
39-
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
40-
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
41-
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
42-
# - name: Execute test_gen_reads
43-
# run: |
44-
# conda activate neat
45-
# cd ${{ github.workspace }}
46-
# poetry install
47-
# neat --log-level ERROR --no-log read-simulator -c data/test_config.yml -o test
48-
# - run: echo "This job's status is ${{ job.status }}."
49-
# - name: Execute seq_err_model_test
50-
# run: |
51-
# cd ${{ github.workspace }}
52-
# neat --log-level ERROR --no-log model-seq-err -i data/baby.fastq
53-
# - run: echo "This job's status is ${{ job.status }}."
30+
- name: Run NEAT Simulation for config_test1
31+
run: python -m neat --log-level DEBUG read-simulator -c config_test1.yml -o ../outputs/test1_read-simulator
32+
33+
- name: Run NEAT Simulation for config_test2
34+
run: python -m neat --log-level DEBUG read-simulator -c config_test2.yml -o ../outputs/test2_read-simulator
35+
36+
- name: Run NEAT Simulation for config_test3
37+
run: python -m neat --log-level DEBUG read-simulator -c config_test3.yml -o ../outputs/test3_read-simulator
38+
39+
- name: Run NEAT Simulation for config_test4
40+
run: python -m neat --log-level DEBUG read-simulator -c config_test4.yml -o ../outputs/test4_read-simulator
41+
42+
- name: Run NEAT Simulation for config_test5
43+
run: python -m neat --log-level DEBUG read-simulator -c config_test5.yml -o ../outputs/test5_read-simulator
44+
45+
- name: Run NEAT Simulation for config_test6
46+
run: python -m neat --log-level DEBUG read-simulator -c config_test6.yml -o ../outputs/test6_read-simulator
47+
48+
- name: Run NEAT Simulation for config_test7
49+
run: python -m neat --log-level DEBUG read-simulator -c config_test7.yml -o ../outputs/test7_read-simulator
50+
51+
- name: Run NEAT Simulation for config_test8
52+
run: python -m neat --log-level DEBUG read-simulator -c config_test8.yml -o ../outputs/test8_read-simulator
53+
54+
- name: Run NEAT Simulation for config_test9
55+
run: python -m neat --log-level DEBUG read-simulator -c config_test9.yml -o ../outputs/test9_read-simulator
56+
57+
- name: Run NEAT Simulation for config_test10
58+
run: python -m neat --log-level DEBUG read-simulator -c config_test10.yml -o ../outputs/test10_read-simulator
59+
60+
- name: Run NEAT Simulation for config_test11
61+
run: python -m neat --log-level DEBUG read-simulator -c config_test11.yml -o ../outputs/test11_read-simulator

config_template/config_test1.yml

+177
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Test 1: Default parameters, H1N1 data
2+
3+
## Template for gen_reads parallel
4+
## Any parameter that is not required but has a default value will use the
5+
## default value even if the variable is not included in the config. For
6+
## required items, they must be included in the config and the must be given a value.
7+
## All other items can be present or not. If present and the value is set to a single
8+
## period, the variable will be treated as though it had been omitted. Please do
9+
## not modify this template, but instead make a copy in your working directory. Done this
10+
## way, you can run without even needing to declare -c.
11+
12+
# Absolute path to input reference fasta file
13+
# type = string | required: yes
14+
reference: ../data/H1N1.fa
15+
16+
# How to partition the reference for analysis. By default, NEAT will
17+
# attempt to process one contig per thread. However, if you have very
18+
# large fasta files, you will see additional runtime benefit from choosing
19+
# the subdivision method, which will split the contigs up into equal sizes
20+
# for processing. If you need further speedups and have access to a distributed system
21+
# you can use a shell script wrapper around NEAT to split the fasta into
22+
# contigs, then join the results later. NEAT does not feature translocations, so
23+
# this will not affect NEAT's output. Note that subdivision will only activate for
24+
# number of threads > 1.
25+
# type = string | required: no | default = chrom | possible values: chrom, subdivision
26+
partition_mode: .
27+
28+
# Read length of the reads in the fastq output. Only required if @produce_fastq is set to true
29+
# type = int | required: no | default = 101
30+
read_len: .
31+
32+
# Number of threads to request for NEAT. The recommended amount is the number of chromosomes in
33+
# your input fasta plus 1.
34+
# type = int | required: no | default = 1
35+
threads: .
36+
37+
# Average Coverage for the entire genome.
38+
# type = float | required: no | default = 10.0
39+
coverage: .
40+
41+
# Absolute path to file with sequencing error model
42+
# type = string | required: no | default: <NEAT_DIR>/neat/models/defaults/default_error_model.pickle.gz
43+
error_model: .
44+
45+
# Average sequencing error rate for the sequencing machine
46+
# type = float | required = no | must be between 0.0 and 0.3
47+
avg_seq_error: .
48+
49+
# This scales the quality scores to match the desired average sequencing error rate
50+
# specified by avg_seq_error.
51+
# type: boolean | required = no | default = false
52+
rescale_qualities: .
53+
54+
# This is the factor to add to the quality scores to get the ascii text version of the
55+
# score. The default follows the sanger quality offset
56+
# type: int | required = no | default = 33
57+
quality_offset: .
58+
59+
# Desired ploidy
60+
# type = int | required = no | default = 2
61+
ploidy: .
62+
63+
# Absolute path to vcf file containing variants that will always be included, regardless
64+
# of genotype and filter. You can pre-filter your vcf for these fields before inputting it
65+
# if this is not the desired behavior.
66+
# type: string | required = no
67+
input_variants: .
68+
69+
# Absolute path to bed file containing reference regions that the simulation
70+
# should target.
71+
# type = string | required = no
72+
target_bed: .
73+
74+
# Scalar value for coverage in regions outside the targeted bed. Example 0.5
75+
# would get you roughly half the coverage as the on target areas. Default is
76+
# 2% of total coverage in off-target regions.
77+
# type: float | required = no | default = 0.02
78+
off_target_scalar: .
79+
80+
# Whether to discard areas outside the targeted bed region. By default, this is set
81+
# to false and NEAT will use a different model for off-target regions but still
82+
# include them in the final output.
83+
# TODO this may not be necessary
84+
# type: boolean | required = no | default = false
85+
discard_offtarget: .
86+
87+
# Absolute path to bed file containing reference regions that the simulation
88+
# should discard.
89+
# type = string | required = no
90+
discard_bed: .
91+
92+
# Absolute path to the mutation model pickle file. Omitting this value will cause
93+
# NEAT to use the default model, with some standard parameters, and generally uniform biases.
94+
# type: string | required = no
95+
mutation_model: .
96+
97+
# Average mutation rate per base pair. Overall average is 0.001, or model default
98+
# Use either this value to override the mutation rate for the default or input model.
99+
# type: float | required = no | must be between 0.0 and 0.3
100+
mutation_rate: .
101+
102+
# Absolute path to a bed file with mutation rates by region.
103+
# Rates must be in the fourth column and be of the form "mut_rate=x.xx"
104+
# Rates must be between 0.00 and 0.03
105+
# type: string | required = no
106+
mutation_bed: .
107+
108+
# Absolute path to GC content model generated by compute_gc.py
109+
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_gc_bias_model.pickle.gz
110+
gc_model: .
111+
112+
# Whether the output should be paired ended. For certain conditions (i.e., vcf only or
113+
# fasta only), this will be ignored. If this is true, then there must be an included fragment
114+
# length model output from runner.py or a mean and standard deviation
115+
# by declaring values for @fragment_mean and @fragment_std_dev.
116+
# type: boolean | required = no | default = false
117+
paired_ended: .
118+
119+
# Absolute path to a pickle file containing the fragment length model output
120+
# from runner.py.
121+
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_fraglen_model.pickle.gz
122+
fragment_model: .
123+
124+
# Mean for the paired end fragment length. This only applies if paired-ended is set to true.
125+
# This number will form the mean for the sample distribution of the fragment lengths in the simulation
126+
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used.
127+
# type: float | required: no (unless paired-ended)
128+
fragment_mean: .
129+
130+
# Standard deviation for the paired end fragment length. This only applies if paired-ended is set to true.
131+
# This number will form the standard deviation about the mean specified above for the sample distribution
132+
# of the fragment lengths in the simulation.
133+
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used.
134+
# type: float | required: no (unless paired-ended)
135+
fragment_st_dev: .
136+
137+
# Whether to produce the golden bam file. This file will contain the reads
138+
# aligned with the exact region of the genome
139+
# type: boolean | required = no | default = false
140+
produce_bam: .
141+
142+
# Whether to produce a vcf file containing all the mutation errors added
143+
# by NEAT.
144+
# type: boolean | required = no | default = false
145+
produce_vcf: .
146+
147+
# Whether to output the mutated fasta. This will output a fasta file with mutations
148+
# inserted. It does not include sequencing errors or read information. Useful for
149+
# multigenerational mutations.
150+
# type: boolean | required = no | default = false
151+
produce_fasta: .
152+
153+
# Whether to output the fastq(s) of the reads. This is the default output. NEAT
154+
# will produce 1 fastq for single ended reads or 2 fastqs for paired ended.
155+
# type: boolean | required = no | default = true
156+
produce_fastq: .
157+
158+
# If set to true, this will ignore statistical models and force coverage to be
159+
# constant across the genome. This is considered a debugging feature.
160+
# type: boolean | required = no | default = false
161+
no_coverage_bias: .
162+
163+
# Set an RNG seed value. Runs using identical RNG values should produce identical results
164+
# so things like read locations, variant positions, error positions, etc. should be the same.
165+
# Useful for debugging.
166+
# type: int | required = no
167+
rng_seed: .
168+
169+
# Set an absolute minimum number of mutations. The program always adds at least 1 mutation.
170+
# Useful for very small datasets.
171+
# type: int | required = no
172+
min_mutations: .
173+
174+
# Overwrite the output files, if they are named the same as the current run.
175+
# Default is to quit if files already exist to avoid data destruction
176+
# type: bool | required = no | default = false
177+
overwrite_output: True

0 commit comments

Comments
 (0)